在Java中实现在线语音识别

我会带着你远行 2021-09-22 10:58 396阅读 0赞

#### ——利用讯飞开发平台作为第三方库 ####

首先需要在讯飞开发平台下载SDK,网址为，[讯飞开发平台][Link 1],这些SDK 下载都是免费的，当然你需要先注册。在SDK 中不仅包含相应的jar包，还有一些相应的demo，可以供你参考学习

![讯飞开发平台][1240]

在我们下载下来第一个SDK 之后就可以进行开发了，讯飞的SDK 给我们提供了详尽而强大的函数支持，下面我就从代码的角度来进行一些解释。

#### 代码 ####

package myVoice;
    
    import java.awt.Button;
    
    import java.awt.Font;
    
    import java.awt.Frame;
    
    import java.awt.GridLayout;
    
    import java.awt.Panel;
    
    import java.awt.TextArea;
    
    import java.awt.event.ActionEvent;
    
    import java.awt.event.ActionListener;
    
    import java.lang.reflect.Parameter;
    
    import java.util.ArrayList;
    
    import javax.swing.ImageIcon;
    
    import javax.swing.JFrame;
    
    import javax.swing.JLabel;
    
    import com.iflytek.cloud.speech.RecognizerListener;
    
    import com.iflytek.cloud.speech.RecognizerResult;
    
    import com.iflytek.cloud.speech.SpeechError;
    
    import com.iflytek.cloud.speech.SpeechRecognizer;
    
    import com.iflytek.cloud.speech.SpeechUtility;
    
    import com.iflytek.util.DebugLog;
    
    import com.iflytek.util.JsonParser;
    
    import com.iflytek.util.Version;
    
    public class VoiceSpeech extends Frame implements ActionListener { 
    
    Button startBtn;
    
    Button stopBtn;
    
    TextArea textArea;
    
    // 语音听写对象
    
    SpeechRecognizer speechRecognize;
    
    private static final String DEF_FONT_NAME = "宋体";
    
    private static final int DEF_FONT_STYLE = Font.BOLD;
    
    private static final int DEF_FONT_SIZE = 30;
    
    private static final int TEXT_COUNT = 100;
    
    public VoiceSpeech() {
    
    // 初始化听写对象
    
    speechRecognize = SpeechRecognizer.createRecognizer();
    
    // 设置组件
    
    startBtn = new Button("start");
    
    stopBtn = new Button("stop");
    
    textArea = new TextArea();
    
    Panel btnPanel = new Panel();
    
    Panel textPanel = new Panel();
    
    // Button startBtn = new Button("开始");
    
    //添加监听器
    
    startBtn.addActionListener(this);
    
    stopBtn.addActionListener(this);
    
    btnPanel.add(startBtn);
    
    btnPanel.add(stopBtn);
    
    textPanel.add(textArea);
    
    add(btnPanel);
    
    add(textPanel);
    
    // 设置窗体
    
    setLayout(new GridLayout(2, 1));
    
    setSize(400, 300);
    
    setTitle("语音识别");
    
    setLocation(200, 200);
    
    setVisible(true);
    
    }
    
    public void actionPerformed(ActionEvent e) {
    
    if (e.getSource() == startBtn) {
    
    textArea.setText("*************你说的是：");
    
    if (!speechRecognize.isListening())
    
    speechRecognize.startListening(recognizerListener);
    
    else
    
    speechRecognize.stopListening();
    
    } else if (e.getSource() == stopBtn) {
    
    speechRecognize.stopListening();
    
    }
    
    }
    
    /** * 听写监听器 */
    
    private RecognizerListener recognizerListener = new RecognizerListener() {
    
    public void onBeginOfSpeech() {
    
    // DebugLog.Log( "onBeginOfSpeech enter" );
    
    // ((JLabel) jbtnRecognizer.getComponent(0)).setText("听写中...");
    
    // jbtnRecognizer.setEnabled(false);
    
    }
    
    public void onEndOfSpeech() {
    
    DebugLog.Log("onEndOfSpeech enter");
    
    }
    
    /** * 获取听写结果. 获取RecognizerResult类型的识别结果，并对结果进行累加，显示到Area里 */
    
    public void onResult(RecognizerResult results, boolean islast) {
    
    DebugLog.Log("onResult enter");
    
    // 如果要解析json结果，请考本项目示例的 com.iflytek.util.JsonParser类
    
    String text =
    
    JsonParser.parseIatResult(results.getResultString());
    
    // String text = results.getResultString();
    
    // JsonParser json = new JsonParser();
    
    // String newTest = json.parseIatResult(text);
    
    // textArea.setText(newTest);
    
    textArea.append(text);
    
    text = textArea.getText();
    
    if (null != text) {
    
    int n = text.length() / TEXT_COUNT + 1;
    
    int fontSize = Math.max(10, DEF_FONT_SIZE - 2 * n);
    
    DebugLog.Log("onResult new font size=" + fontSize);
    
    int style = n > 1 ? Font.PLAIN : DEF_FONT_SIZE;
    
    Font newFont = new Font(DEF_FONT_NAME, style, fontSize);
    
    textArea.setFont(newFont);
    
    }
    
    if (islast) {
    
    iatSpeechInitUI();
    
    }
    
    }
    
    public void onVolumeChanged(int volume) {
    
    DebugLog.Log("onVolumeChanged enter");
    
    if (volume == 0)
    
    volume = 1;
    
    else if (volume >= 6)
    
    volume = 6;
    
    // labelWav.setIcon(new ImageIcon("res/mic_0" + volume + ".png"));
    
    }
    
    public void onError(SpeechError error) {
    
    DebugLog.Log("onError enter");
    
    if (null != error) {
    
    DebugLog.Log("onError Code：" + error.getErrorCode());
    
    textArea.setText(error.getErrorDescription(true));
    
    iatSpeechInitUI();
    
    }
    
    }
    
    public void onEvent(int eventType, int arg1, int agr2, String msg) {
    
    DebugLog.Log("onEvent enter");
    
    }
    
    };
    
    /** * 听写结束，恢复初始状态 */
    
    public void iatSpeechInitUI() {
    
    // labelWav.setIcon(new ImageIcon("res/mic_01.png"));
    
    // jbtnRecognizer.setEnabled(true);
    
    // ((JLabel) jbtnRecognizer.getComponent(0)).setText("开始听写");
    
    }
    
    public static void main(String[] args) {
    
    // 初始化
    
    StringBuffer param = new StringBuffer();
    
    param.append( "appid=" + Version.getAppid() );
    
    // param.append( ","+SpeechConstant.LIB_NAME_32+"=myMscName" );
    
    SpeechUtility.createUtility( param.toString() );
    
    VoiceSpeech t = new VoiceSpeech();
    
    }
    
    }

#### 代码解析 ####

1.SpeechRecognizer类，语音识别类，语音识别，包括听写、语法识别功能。本类使用单例，调用者使用本类的对象，只需要通过createRecognizer()创建 一次对象后，便可一直使用该对象，直到通过调用destroy()进行单例对象销毁。调 用者可通过getRecognizer()获取当前已经创建的单例。我们在一开始导包，把相应的类导入，然后声明语音识别类，然后在VoiceSpeech类的构造器中初始化。

2.在SpeechRecognizer类中有很多有关语音识别的方法，

（1）startListening方法，开始进行语音识别，其方法的参数是一个回调函数，这个方法是另一个类RecognizerListener声明的实例，在其匿名内部类中重写关键的方法，借此到底我们想要的结果，我们在onResult方法中重写，把识别的结果通过json解析之后（识别的结果默认是json格式），把它依次添加到文本栏上面，之后我们对文本栏的内容进行文字字体大小等的设定

（2）stopListening方法，等录音结束之后，调用该方法，把录音结果通过网络传输给讯飞远程识别平台进行解析，解析完成之后，把解析结果传送过来

3.在main方法中先要进行SpeechUtility.createUtility，这是讯飞SDK的初始化，相当于远程连接讯飞识别平台，因为Java现在还不支持离线识别，所以在进行识别方法调用之前，必须连接讯飞开发平台，这个方法的作用正是如此，其参数就是不同的识别版本

4.因为很多方法都是讯飞提供的，所以我们需要导入相应的包

具体如下

import com.iflytek.cloud.speech.RecognizerListener;
    
    import com.iflytek.cloud.speech.RecognizerResult;
    
    import com.iflytek.cloud.speech.SpeechError;
    
    import com.iflytek.cloud.speech.SpeechRecognizer;
    
    import com.iflytek.cloud.speech.SpeechUtility;
    
    import com.iflytek.util.DebugLog;
    
    import com.iflytek.util.JsonParser;//json解析类
    
    import com.iflytek.util.Version;//版本类

这些在SDK 中都有

#### 最终的结果 ####

ps:因为只是注重识别功能，所以界面很丑。。。

![最终截图][1240 1]

[Link 1]: http://www.xfyun.cn/sdk/dispatcher
[1240]: /images/20210920/c24f66e6a92e42029504e815f506c407.png
[1240 1]: /images/20210920/f563b62f662a4fad9077fe93e66ec84e.png