流式调用通过实现回调接口的方式流式输出实时识别结果。
启动流式语音识别/翻译
实例化TranslationRecognizerChat类,调用call方法绑定请求参数和回调接口(ResultCallback)并启动流式语音识别/翻译。
流式传输
循环调用TranslationRecognizerChat类的sendAudioFrame方法,将从本地文件或设备(如麦克风)读取的二进制音频流分段发送至服务端。
在发送音频数据的过程中,服务端会通过回调接口(ResultCallback)的onEvent方法,将识别结果实时返回给客户端。
建议每次发送的音频时长约为100毫秒,数据大小保持在1KB至16KB之间。
当识别到一句话后,sendAudioFrame方法返回false,音频数据无法再继续上传。此时请终止循环。
结束处理
调用TranslationRecognizerChat类的stop方法结束语音识别/翻译。
该方法会阻塞当前线程,直到回调接口(ResultCallback)的onComplete或者onError回调触发后才会释放线程阻塞。
点击查看完整示例
识别传入麦克风的语音package org.alibaba.bailian.example.examples;
import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerChat;
import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerParam;
import com.alibaba.dashscope.audio.asr.translation.results.TranscriptionResult;
import com.alibaba.dashscope.audio.asr.translation.results.TranslationRecognizerResult;
import com.alibaba.dashscope.common.ResultCallback;
import com.alibaba.dashscope.exception.NoApiKeyException;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.TargetDataLine;
import java.nio.ByteBuffer;
public class Main {
public static void main(String[] args) throws NoApiKeyException, InterruptedException, LineUnavailableException {
// 创建Recognizer
TranslationRecognizerChat translator = new TranslationRecognizerChat();
// 初始化请求参数
TranslationRecognizerParam param =
TranslationRecognizerParam.builder()
// 若没有将API Key配置到环境变量中,需将your-api-key替换为自己的API Key
// .apiKey("your-api-key")
.model("gummy-chat-v1") // 设置模型名
.format("pcm") // 设置待识别音频格式,支持的音频格式:pcm、pcm编码的wav、mp3、ogg封装的opus、ogg封装的speex、aac、amr
.sampleRate(16000) // 设置待识别音频采样率(单位Hz)。仅支持16000Hz采样率。
.transcriptionEnabled(true) // 设置是否开启实时识别
.translationEnabled(true) // 设置是否开启实时翻译
.translationLanguages(new String[] {"en"}) // 设置翻译目标语言
.build();
try {
translator.call(param, new ResultCallback
@Override
public void onEvent(TranslationRecognizerResult result) {
if (result.getTranscriptionResult() == null) {
return;
}
try {
System.out.println("RequestId: " + result.getRequestId());
// 打印最终结果
if (result.getTranscriptionResult() != null) {
System.out.println("Transcription Result:");
if (result.isSentenceEnd()) {
System.out.println("\tFix:" + result.getTranscriptionResult().getText());
} else {
TranscriptionResult transcriptionResult = result.getTranscriptionResult();
System.out.println("\tTemp Result:" + transcriptionResult.getText());
if (result.getTranscriptionResult().isVadPreEnd()) {
System.out.printf("VadPreEnd: start:%d, end:%d, time:%d\n", transcriptionResult.getPreEndStartTime(), transcriptionResult.getPreEndEndTime(), transcriptionResult.getPreEndTimemillis());
}
}
}
if (result.getTranslationResult() != null) {
System.out.println("English Translation Result:");
if (result.isSentenceEnd()) {
System.out.println("\tFix:" + result.getTranslationResult().getTranslation("en").getText());
} else {
System.out.println("\tTemp Result:" + result.getTranslationResult().getTranslation("en").getText());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void onComplete() {
System.out.println("Translation complete");
}
@Override
public void onError(Exception e) {
}
});
// 创建音频格式
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
// 根据格式匹配默认录音设备
TargetDataLine targetDataLine =
AudioSystem.getTargetDataLine(audioFormat);
targetDataLine.open(audioFormat);
// 开始录音
targetDataLine.start();
System.out.println("请您通过麦克风讲话体验一句话语音识别和翻译功能");
ByteBuffer buffer = ByteBuffer.allocate(1024);
long start = System.currentTimeMillis();
// 录音5s并进行实时识别
while (System.currentTimeMillis() - start < 50000) {
int read = targetDataLine.read(buffer.array(), 0, buffer.capacity());
if (read > 0) {
buffer.limit(read);
// 将录音音频数据发送给流式识别服务
if (!translator.sendAudioFrame(buffer)) {
System.out.println("sentence end, stop sending");
break;
}
buffer = ByteBuffer.allocate(1024);
// 录音速率有限,防止cpu占用过高,休眠一小会儿
Thread.sleep(20);
}
}
translator.stop();
} catch (Exception e) {
e.printStackTrace();
} finally {
// 任务结束关闭 websocket 连接
translator.getDuplexApi().close(1000, "bye");
}
System.exit(0);
}
}识别本地语音文件示例中用到的音频为:hello_world.wav。
import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerChat;
import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerParam;
import com.alibaba.dashscope.audio.asr.translation.results.TranslationRecognizerResult;
import com.alibaba.dashscope.common.ResultCallback;
import com.alibaba.dashscope.exception.NoApiKeyException;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
class RealtimeTranslateChatTask implements Runnable {
private Path filepath;
private TranslationRecognizerChat translator = null;
public RealtimeTranslateChatTask(Path filepath) {
this.filepath = filepath;
}
@Override
public void run() {
for (int i=0; i<1; i++) {
// 初始化请求参数
TranslationRecognizerParam param =
TranslationRecognizerParam.builder()
// 若没有将API Key配置到环境变量中,需将your-api-key替换为自己的API Key
// .apiKey("your-api-key")
.model("gummy-chat-v1") // 设置模型名
.format("wav") // 设置待识别音频格式,支持的音频格式:pcm、pcm编码的wav、mp3、ogg封装的opus、ogg封装的speex、aac、amr
.sampleRate(16000) // 设置待识别音频采样率(单位Hz)。只支持16000Hz的采样率。
.transcriptionEnabled(true) // 设置是否开启实时识别
.translationEnabled(true) // 设置是否开启实时翻译
.translationLanguages(new String[] {"en"}) // 设置翻译目标语言
.build();
if (translator == null) {
// 初始化流式识别服务
translator = new TranslationRecognizerChat();
}
String threadName = Thread.currentThread().getName();
// 初始化回调接口
ResultCallback
new ResultCallback
@Override
public void onEvent(TranslationRecognizerResult result) {
System.out.println("RequestId: " + result.getRequestId());
// 打印最终结果
if (result.getTranscriptionResult() != null) {
System.out.println("Transcription Result:"+result);
if (result.isSentenceEnd()) {
System.out.println("\tFix:" + result.getTranscriptionResult().getText());
} else {
System.out.println("\tTemp Result:" + result.getTranscriptionResult().getText());
}
}
if (result.getTranslationResult() != null) {
System.out.println("English Translation Result:");
if (result.isSentenceEnd()) {
System.out.println("\tFix:" + result.getTranslationResult().getTranslation("en").getText());
} else {
System.out.println("\tTemp Result:" + result.getTranslationResult().getTranslation("en").getText());
}
}
}
@Override
public void onComplete() {
System.out.println("[" + threadName + "] Translation complete");
}
@Override
public void onError(Exception e) {
e.printStackTrace();
System.out.println("[" + threadName + "] TranslationCallback error: " + e.getMessage());
}
};
try {
// 启动流式语音识别/翻译,绑定请求参数和回调接口
translator.call(param, callback);
// 替换成您自己的文件路径
System.out.println("[" + threadName + "] Input file_path is: " + this.filepath);
// Read file and send audio by chunks
try (FileInputStream fis = new FileInputStream(this.filepath.toFile())) {
// chunk size set to 1 seconds for 16KHz sample rate
byte[] buffer = new byte[3200];
int bytesRead;
// Loop to read chunks of the file
while ((bytesRead = fis.read(buffer)) != -1) {
ByteBuffer byteBuffer;
// Handle the last chunk which might be smaller than the buffer size
System.out.println("[" + threadName + "] bytesRead: " + bytesRead);
if (bytesRead < buffer.length) {
byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead);
} else {
byteBuffer = ByteBuffer.wrap(buffer);
}
// Send the ByteBuffer to the translation instance
if (!translator.sendAudioFrame(byteBuffer)) {
System.out.println("sentence end, stop sending");
break;
}
buffer = new byte[3200];
Thread.sleep(100);
}
fis.close();
System.out.println(LocalDateTime.now());
} catch (Exception e) {
e.printStackTrace();
}
// 通知结束
translator.stop();
} catch (Exception e) {
e.printStackTrace();
} finally {
// 任务结束关闭 websocket 连接
if (translator != null) {
translator.getDuplexApi().close(1000, "bye");
}
}
}
}
}
public class Main {
public static void main(String[] args)
throws NoApiKeyException, InterruptedException {
String currentDir = System.getProperty("user.dir");
// Please replace the path with your audio source
Path[] filePaths = {
Paths.get(currentDir, "hello_world.wav"),
// Paths.get(currentDir, "hello_world_male_16k_16bit_mono.wav"),
};
// Use ThreadPool to run recognition tasks
ExecutorService executorService = Executors.newFixedThreadPool(10);
for (Path filepath:filePaths) {
executorService.submit(new RealtimeTranslateChatTask(filepath));
}
executorService.shutdown();
// wait for all tasks to complete
executorService.awaitTermination(1, TimeUnit.MINUTES);
// System.exit(0);
}
}