Gummy一句话识别、翻译Java SDK

Gummy一句话识别、翻译Java SDK

流式调用通过实现回调接口的方式流式输出实时识别结果。

启动流式语音识别/翻译

实例化TranslationRecognizerChat类,调用call方法绑定请求参数和回调接口(ResultCallback)并启动流式语音识别/翻译。

流式传输

循环调用TranslationRecognizerChat类的sendAudioFrame方法,将从本地文件或设备(如麦克风)读取的二进制音频流分段发送至服务端。

在发送音频数据的过程中,服务端会通过回调接口(ResultCallback)的onEvent方法,将识别结果实时返回给客户端。

建议每次发送的音频时长约为100毫秒,数据大小保持在1KB至16KB之间。

当识别到一句话后,sendAudioFrame方法返回false,音频数据无法再继续上传。此时请终止循环。

结束处理

调用TranslationRecognizerChat类的stop方法结束语音识别/翻译。

该方法会阻塞当前线程,直到回调接口(ResultCallback)的onComplete或者onError回调触发后才会释放线程阻塞。

点击查看完整示例

识别传入麦克风的语音package org.alibaba.bailian.example.examples;

import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerChat;

import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerParam;

import com.alibaba.dashscope.audio.asr.translation.results.TranscriptionResult;

import com.alibaba.dashscope.audio.asr.translation.results.TranslationRecognizerResult;

import com.alibaba.dashscope.common.ResultCallback;

import com.alibaba.dashscope.exception.NoApiKeyException;

import javax.sound.sampled.AudioFormat;

import javax.sound.sampled.AudioSystem;

import javax.sound.sampled.LineUnavailableException;

import javax.sound.sampled.TargetDataLine;

import java.nio.ByteBuffer;

public class Main {

public static void main(String[] args) throws NoApiKeyException, InterruptedException, LineUnavailableException {

// 创建Recognizer

TranslationRecognizerChat translator = new TranslationRecognizerChat();

// 初始化请求参数

TranslationRecognizerParam param =

TranslationRecognizerParam.builder()

// 若没有将API Key配置到环境变量中,需将your-api-key替换为自己的API Key

// .apiKey("your-api-key")

.model("gummy-chat-v1") // 设置模型名

.format("pcm") // 设置待识别音频格式,支持的音频格式:pcm、pcm编码的wav、mp3、ogg封装的opus、ogg封装的speex、aac、amr

.sampleRate(16000) // 设置待识别音频采样率(单位Hz)。仅支持16000Hz采样率。

.transcriptionEnabled(true) // 设置是否开启实时识别

.translationEnabled(true) // 设置是否开启实时翻译

.translationLanguages(new String[] {"en"}) // 设置翻译目标语言

.build();

try {

translator.call(param, new ResultCallback() {

@Override

public void onEvent(TranslationRecognizerResult result) {

if (result.getTranscriptionResult() == null) {

return;

}

try {

System.out.println("RequestId: " + result.getRequestId());

// 打印最终结果

if (result.getTranscriptionResult() != null) {

System.out.println("Transcription Result:");

if (result.isSentenceEnd()) {

System.out.println("\tFix:" + result.getTranscriptionResult().getText());

} else {

TranscriptionResult transcriptionResult = result.getTranscriptionResult();

System.out.println("\tTemp Result:" + transcriptionResult.getText());

if (result.getTranscriptionResult().isVadPreEnd()) {

System.out.printf("VadPreEnd: start:%d, end:%d, time:%d\n", transcriptionResult.getPreEndStartTime(), transcriptionResult.getPreEndEndTime(), transcriptionResult.getPreEndTimemillis());

}

}

}

if (result.getTranslationResult() != null) {

System.out.println("English Translation Result:");

if (result.isSentenceEnd()) {

System.out.println("\tFix:" + result.getTranslationResult().getTranslation("en").getText());

} else {

System.out.println("\tTemp Result:" + result.getTranslationResult().getTranslation("en").getText());

}

}

} catch (Exception e) {

e.printStackTrace();

}

}

@Override

public void onComplete() {

System.out.println("Translation complete");

}

@Override

public void onError(Exception e) {

}

});

// 创建音频格式

AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);

// 根据格式匹配默认录音设备

TargetDataLine targetDataLine =

AudioSystem.getTargetDataLine(audioFormat);

targetDataLine.open(audioFormat);

// 开始录音

targetDataLine.start();

System.out.println("请您通过麦克风讲话体验一句话语音识别和翻译功能");

ByteBuffer buffer = ByteBuffer.allocate(1024);

long start = System.currentTimeMillis();

// 录音5s并进行实时识别

while (System.currentTimeMillis() - start < 50000) {

int read = targetDataLine.read(buffer.array(), 0, buffer.capacity());

if (read > 0) {

buffer.limit(read);

// 将录音音频数据发送给流式识别服务

if (!translator.sendAudioFrame(buffer)) {

System.out.println("sentence end, stop sending");

break;

}

buffer = ByteBuffer.allocate(1024);

// 录音速率有限,防止cpu占用过高,休眠一小会儿

Thread.sleep(20);

}

}

translator.stop();

} catch (Exception e) {

e.printStackTrace();

} finally {

// 任务结束关闭 websocket 连接

translator.getDuplexApi().close(1000, "bye");

}

System.exit(0);

}

}识别本地语音文件示例中用到的音频为:hello_world.wav。

import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerChat;

import com.alibaba.dashscope.audio.asr.translation.TranslationRecognizerParam;

import com.alibaba.dashscope.audio.asr.translation.results.TranslationRecognizerResult;

import com.alibaba.dashscope.common.ResultCallback;

import com.alibaba.dashscope.exception.NoApiKeyException;

import java.io.FileInputStream;

import java.nio.ByteBuffer;

import java.nio.file.Path;

import java.nio.file.Paths;

import java.time.LocalDateTime;

import java.util.concurrent.ExecutorService;

import java.util.concurrent.Executors;

import java.util.concurrent.TimeUnit;

class RealtimeTranslateChatTask implements Runnable {

private Path filepath;

private TranslationRecognizerChat translator = null;

public RealtimeTranslateChatTask(Path filepath) {

this.filepath = filepath;

}

@Override

public void run() {

for (int i=0; i<1; i++) {

// 初始化请求参数

TranslationRecognizerParam param =

TranslationRecognizerParam.builder()

// 若没有将API Key配置到环境变量中,需将your-api-key替换为自己的API Key

// .apiKey("your-api-key")

.model("gummy-chat-v1") // 设置模型名

.format("wav") // 设置待识别音频格式,支持的音频格式:pcm、pcm编码的wav、mp3、ogg封装的opus、ogg封装的speex、aac、amr

.sampleRate(16000) // 设置待识别音频采样率(单位Hz)。只支持16000Hz的采样率。

.transcriptionEnabled(true) // 设置是否开启实时识别

.translationEnabled(true) // 设置是否开启实时翻译

.translationLanguages(new String[] {"en"}) // 设置翻译目标语言

.build();

if (translator == null) {

// 初始化流式识别服务

translator = new TranslationRecognizerChat();

}

String threadName = Thread.currentThread().getName();

// 初始化回调接口

ResultCallback callback =

new ResultCallback() {

@Override

public void onEvent(TranslationRecognizerResult result) {

System.out.println("RequestId: " + result.getRequestId());

// 打印最终结果

if (result.getTranscriptionResult() != null) {

System.out.println("Transcription Result:"+result);

if (result.isSentenceEnd()) {

System.out.println("\tFix:" + result.getTranscriptionResult().getText());

} else {

System.out.println("\tTemp Result:" + result.getTranscriptionResult().getText());

}

}

if (result.getTranslationResult() != null) {

System.out.println("English Translation Result:");

if (result.isSentenceEnd()) {

System.out.println("\tFix:" + result.getTranslationResult().getTranslation("en").getText());

} else {

System.out.println("\tTemp Result:" + result.getTranslationResult().getTranslation("en").getText());

}

}

}

@Override

public void onComplete() {

System.out.println("[" + threadName + "] Translation complete");

}

@Override

public void onError(Exception e) {

e.printStackTrace();

System.out.println("[" + threadName + "] TranslationCallback error: " + e.getMessage());

}

};

try {

// 启动流式语音识别/翻译,绑定请求参数和回调接口

translator.call(param, callback);

// 替换成您自己的文件路径

System.out.println("[" + threadName + "] Input file_path is: " + this.filepath);

// Read file and send audio by chunks

try (FileInputStream fis = new FileInputStream(this.filepath.toFile())) {

// chunk size set to 1 seconds for 16KHz sample rate

byte[] buffer = new byte[3200];

int bytesRead;

// Loop to read chunks of the file

while ((bytesRead = fis.read(buffer)) != -1) {

ByteBuffer byteBuffer;

// Handle the last chunk which might be smaller than the buffer size

System.out.println("[" + threadName + "] bytesRead: " + bytesRead);

if (bytesRead < buffer.length) {

byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead);

} else {

byteBuffer = ByteBuffer.wrap(buffer);

}

// Send the ByteBuffer to the translation instance

if (!translator.sendAudioFrame(byteBuffer)) {

System.out.println("sentence end, stop sending");

break;

}

buffer = new byte[3200];

Thread.sleep(100);

}

fis.close();

System.out.println(LocalDateTime.now());

} catch (Exception e) {

e.printStackTrace();

}

// 通知结束

translator.stop();

} catch (Exception e) {

e.printStackTrace();

} finally {

// 任务结束关闭 websocket 连接

if (translator != null) {

translator.getDuplexApi().close(1000, "bye");

}

}

}

}

}

public class Main {

public static void main(String[] args)

throws NoApiKeyException, InterruptedException {

String currentDir = System.getProperty("user.dir");

// Please replace the path with your audio source

Path[] filePaths = {

Paths.get(currentDir, "hello_world.wav"),

// Paths.get(currentDir, "hello_world_male_16k_16bit_mono.wav"),

};

// Use ThreadPool to run recognition tasks

ExecutorService executorService = Executors.newFixedThreadPool(10);

for (Path filepath:filePaths) {

executorService.submit(new RealtimeTranslateChatTask(filepath));

}

executorService.shutdown();

// wait for all tasks to complete

executorService.awaitTermination(1, TimeUnit.MINUTES);

// System.exit(0);

}

}

相关文章

河池市是哪个省
beat365手机版官方

河池市是哪个省

📅 07-25 👁️ 1700
简单又好听微信网名
365平台赌博

简单又好听微信网名

📅 07-17 👁️ 7892
为什么许世友没能获得大将军衔?
beat365手机版官方

为什么许世友没能获得大将军衔?

📅 09-22 👁️ 1568