java实现变声器--变声萝莉、大叔、熊孩子

╰半橙微兮° 2022-09-05 12:53 92阅读 0赞

编写java变声器需要做的前期准备

安装 ffmgeg 下载地址 https://github.com/BtbN/FFmpeg-Builds/releases/

win系统下载 ffmpeg-N-103272-g7bba0dd638-win64-gpl.zip

配置环境变量到 bin目录

20210818103646250.png

新建maven程序加入两个引用

  1. <!-- https://mvnrepository.com/artifact/com.github.st-h/TarsosDSP -->
  2. <dependency>
  3. <groupId>com.github.st-h</groupId>
  4. <artifactId>TarsosDSP</artifactId>
  5. <version>2.4.1</version>
  6. </dependency>
  7. <dependency>
  8. <groupId>ws.schild</groupId>
  9. <artifactId>jave-core</artifactId>
  10. <version>2.4.6</version>
  11. </dependency>

新建两个基础类

AudioOutputToByteArray

  1. import be.tarsos.dsp.AudioEvent;
  2. import be.tarsos.dsp.AudioProcessor;
  3. import java.io.ByteArrayOutputStream;
  4. public class AudioOutputToByteArray implements AudioProcessor {
  5. private boolean isDone = false;
  6. private byte[] out = null;
  7. private ByteArrayOutputStream bos;
  8. public AudioOutputToByteArray() {
  9. bos = new ByteArrayOutputStream();
  10. }
  11. public ByteArrayOutputStream getBos() {
  12. return bos;
  13. }
  14. public byte[] getData() {
  15. while (!isDone && out == null) {
  16. try {
  17. Thread.sleep(10);
  18. } catch (InterruptedException ignored) {}
  19. }
  20. return out;
  21. }
  22. @Override
  23. public boolean process(AudioEvent audioEvent) {
  24. bos.write(audioEvent.getByteBuffer(),0,audioEvent.getByteBuffer().length);
  25. return true;
  26. }
  27. @Override
  28. public void processingFinished() {
  29. out = bos.toByteArray().clone();
  30. bos = null;
  31. isDone = true;
  32. }
  33. }

WaveHeader

  1. import java.io.ByteArrayOutputStream;
  2. import java.io.IOException;
  3. public class WaveHeader {
  4. public final char fileID[] = {'R', 'I', 'F', 'F'};
  5. public int fileLength;
  6. public char wavTag[] = {'W', 'A', 'V', 'E'};;
  7. public char FmtHdrID[] = {'f', 'm', 't', ' '};
  8. public int FmtHdrLeth;
  9. public short FormatTag;
  10. public short Channels;
  11. public int SamplesPerSec;
  12. public int AvgBytesPerSec;
  13. public short BlockAlign;
  14. public short BitsPerSample;
  15. public char DataHdrID[] = {'d','a','t','a'};
  16. public int DataHdrLeth;
  17. public byte[] getHeader() throws IOException {
  18. ByteArrayOutputStream bos = new ByteArrayOutputStream();
  19. WriteChar(bos, fileID);
  20. WriteInt(bos, fileLength);
  21. WriteChar(bos, wavTag);
  22. WriteChar(bos, FmtHdrID);
  23. WriteInt(bos,FmtHdrLeth);
  24. WriteShort(bos,FormatTag);
  25. WriteShort(bos,Channels);
  26. WriteInt(bos,SamplesPerSec);
  27. WriteInt(bos,AvgBytesPerSec);
  28. WriteShort(bos,BlockAlign);
  29. WriteShort(bos,BitsPerSample);
  30. WriteChar(bos,DataHdrID);
  31. WriteInt(bos,DataHdrLeth);
  32. bos.flush();
  33. byte[] r = bos.toByteArray();
  34. bos.close();
  35. return r;
  36. }
  37. private void WriteShort(ByteArrayOutputStream bos, int s) throws IOException {
  38. byte[] mybyte = new byte[2];
  39. mybyte[1] =(byte)( (s << 16) >> 24 );
  40. mybyte[0] =(byte)( (s << 24) >> 24 );
  41. bos.write(mybyte);
  42. }
  43. private void WriteInt(ByteArrayOutputStream bos, int n) throws IOException {
  44. byte[] buf = new byte[4];
  45. buf[3] =(byte)( n >> 24 );
  46. buf[2] =(byte)( (n << 8) >> 24 );
  47. buf[1] =(byte)( (n << 16) >> 24 );
  48. buf[0] =(byte)( (n << 24) >> 24 );
  49. bos.write(buf);
  50. }
  51. private void WriteChar(ByteArrayOutputStream bos, char[] id) {
  52. for (int i=0; i<id.length; i++) {
  53. char c = id[i];
  54. bos.write(c);
  55. }
  56. }
  57. }

现在新建执行类和方法,先在d盘放一个1.mp3的文件。以下代码就是把1.mp3转换成变声后的2.mp3

byte[] pcmBytes = speechPitchShiftMp3(“d://1.mp3”, 0.73, 0.73); 后面的0.73就是变声参数。最后会给出各种变声参数

  1. import java.io.FileOutputStream;
  2. import java.io.IOException;
  3. import java.io.OutputStream;
  4. import javax.sound.sampled.UnsupportedAudioFileException;
  5. import be.tarsos.dsp.AudioDispatcher;
  6. import be.tarsos.dsp.WaveformSimilarityBasedOverlapAdd;
  7. import be.tarsos.dsp.effects.DelayEffect;
  8. import be.tarsos.dsp.io.jvm.AudioDispatcherFactory;
  9. import be.tarsos.dsp.resample.RateTransposer;
  10. public class ceshi {
  11. public static void main(String[] args) throws Exception {
  12. //这里返回的是pcm格式的音频
  13. byte[] pcmBytes = speechPitchShiftMp3("d://1.mp3", 0.73, 0.73);
  14. //如果需要转成wav则需要给pcmBytes增加一个头部信息
  15. //TarsosDSP中也有输出Wav格式音频的处理器,这里没有使用。
  16. byte[] wavHeader = pcm2wav(pcmBytes);
  17. OutputStream wavOutPut = new FileOutputStream("d://2.mp3");
  18. wavOutPut.write(wavHeader);
  19. wavOutPut.write(pcmBytes);
  20. wavOutPut.flush();
  21. wavOutPut.close();
  22. // 对于各种声音类型,以及所需添加的处理器,还有处理器参数代码,将在本文最后给出。
  23. //如果需要转mp3格式的,也可以给我留言,我会加上。
  24. }
  25. /**
  26. * 变声
  27. * @param speedFactor 变速率 (0,2) 大于1为加快语速,小于1为放慢语速
  28. * @param rateFactor 音调变化率 (0,2) 大于1为降低音调(深沉),小于1为提升音调(尖锐)
  29. * @return 变声后的MP3数据输入流
  30. */
  31. public static byte[] speechPitchShiftMp3(String fileUrl, double rateFactor, double speedFactor) throws IOException, UnsupportedAudioFileException {
  32. WaveformSimilarityBasedOverlapAdd w = new WaveformSimilarityBasedOverlapAdd(WaveformSimilarityBasedOverlapAdd.Parameters.speechDefaults(rateFactor, 16000));
  33. int inputBufferSize = w.getInputBufferSize();
  34. int overlap = w.getOverlap();
  35. AudioDispatcher dispatcher = AudioDispatcherFactory.fromPipe(fileUrl,16000,inputBufferSize,overlap);
  36. w.setDispatcher(dispatcher);
  37. dispatcher.addAudioProcessor(w);
  38. /** 采样率转换器。 使用插值更改采样率, 与时间拉伸器一起可用于音高转换。 **/
  39. dispatcher.addAudioProcessor(new RateTransposer(speedFactor));
  40. AudioOutputToByteArray out = new AudioOutputToByteArray();
  41. /** 声音速率转换器 -- 失败 **/
  42. /*SoundTouchRateTransposer soundTouchRateTransposer = new SoundTouchRateTransposer(2);
  43. soundTouchRateTransposer.setDispatcher(dispatcher);
  44. dispatcher.addAudioProcessor(soundTouchRateTransposer);*/
  45. /** 正弦波发生器 -- 无反应 **/
  46. /*SineGenerator sineGenerator = new SineGenerator(0.5, 0.5);
  47. dispatcher.addAudioProcessor(sineGenerator);*/
  48. /** 音调转换器 -- 无效果 **/
  49. // dispatcher.addAudioProcessor(new PitchShifter(0.1,16000,448,overlap));
  50. /** 制粒机使用颗粒合成回放样本。方法可用于控制播放速率,音高,颗粒大小, -- 无效果 **/
  51. // dispatcher.addAudioProcessor(new OptimizedGranulator(16000, 448));
  52. /** 噪音产生器 -- 有效果 **/
  53. // dispatcher.addAudioProcessor(new NoiseGenerator(0.2 ));
  54. /** 增益处理器 增益为1,则无任何反应。 增益大于1表示音量增加a -- 有反应 **/
  55. // dispatcher.addAudioProcessor(new GainProcessor(10));
  56. /**镶边效果 -- 有反应 **/
  57. // dispatcher.addAudioProcessor(new FlangerEffect(64, 0.3, 16000, 16000));// 回声效果
  58. // dispatcher.addAudioProcessor(new FlangerEffect(1 << 4, 0.8, 8000, 2000));// 感冒
  59. // dispatcher.addAudioProcessor(new ZeroCrossingRateProcessor());//感冒
  60. /** 淡出 --声音慢慢变小 **/
  61. // dispatcher.addAudioProcessor(new FadeOut(5));
  62. /** 淡入-- 声音慢慢变大 **/
  63. // dispatcher.addAudioProcessor(new FadeIn(5));
  64. /** 在信号上添加回声效果。echoLength以秒为单位 elay回声的衰减,介于0到1之间的值。1表示无衰减,0表示立即衰减 **/
  65. dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000) );
  66. /** 调幅噪声 -- 将声音转换为噪声**/
  67. // dispatcher.addAudioProcessor(new AmplitudeModulatedNoise());
  68. /** 振幅LFO -- 声音波动 **/
  69. // dispatcher.addAudioProcessor(new AmplitudeLFO());
  70. dispatcher.addAudioProcessor(out);
  71. dispatcher.run();
  72. // return new ByteArrayInputStream(out.getData());
  73. return out.getData();
  74. }
  75. public static byte[] pcm2wav(byte[] bytes) throws IOException {
  76. //填入参数,比特率等等。这里用的是16位单声道 8000 hz
  77. WaveHeader header = new WaveHeader();
  78. //长度字段 = 内容的大小(PCMSize) + 头部字段的大小(不包括前面4字节的标识符RIFF以及fileLength本身的4字节)
  79. header.fileLength = bytes.length + (44 - 8);
  80. header.FmtHdrLeth = 16;
  81. header.BitsPerSample = 16;
  82. header.Channels = 1;
  83. header.FormatTag = 0x0001;
  84. header.SamplesPerSec = 16000;
  85. header.BlockAlign = (short)(header.Channels * header.BitsPerSample / 8);
  86. header.AvgBytesPerSec = header.BlockAlign * header.SamplesPerSec;
  87. header.DataHdrLeth = bytes.length;
  88. byte[] h = header.getHeader();
  89. assert h.length == 44; //WAV标准,头部应该是44字节
  90. return h;
  91. }
  92. }

各种参数类

LUOLI(0.6, 0.6, “萝莉”, 1, dispatcher -> {})

byte[] pcmBytes = speechPitchShiftMp3(“d://1.mp3”, 0.73, 0.73);改为

byte[] pcmBytes = speechPitchShiftMp3(“d://1.mp3”, 0.6, 0.6);就可以了

  1. import be.tarsos.dsp.AudioDispatcher;
  2. import be.tarsos.dsp.WaveformSimilarityBasedOverlapAdd;
  3. import be.tarsos.dsp.ZeroCrossingRateProcessor;
  4. import be.tarsos.dsp.effects.DelayEffect;
  5. import be.tarsos.dsp.io.jvm.AudioDispatcherFactory;
  6. import be.tarsos.dsp.resample.RateTransposer;
  7. import java.io.File;
  8. import java.io.IOException;
  9. import java.util.Optional;
  10. import java.util.function.Consumer;
  11. public enum SoundEnum {
  12. LUOLI(0.6, 0.6, "萝莉", 1, dispatcher -> {}),
  13. DASHU(1.2, 1.2, "大叔", 2, dispatcher -> {}),
  14. FEIZAI(1.5, 1.5, "肥仔", 3, dispatcher -> {}),
  15. GAOGUAI(1.5, 0.8, "搞怪", 4, dispatcher -> {}),
  16. XIONGHAIZI(0.73, 0.73, "熊孩子", 5, dispatcher -> {}),
  17. MANTUNTUN(0.35,1, "慢吞吞",6 , dispatcher -> {}),
  18. WANGHONGNV(1.2,0.7, "网红女",7 , dispatcher -> {}),
  19. /**
  20. * dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000) );
  21. */
  22. KUNSHOU(1.55,1.55, "困兽", 8, dispatcher -> dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000))),
  23. /**
  24. * dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000) );
  25. */
  26. ZHONGJIXIE(1.50,1.50, "重机械", 9, dispatcher -> dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000))),
  27. /**
  28. * dispatcher.addAudioProcessor(new FlangerEffect(1 << 4, 0.8, 8000, 2000));
  29. * dispatcher.addAudioProcessor(new ZeroCrossingRateProcessor());
  30. */
  31. GANMAO(1.05,1.05, "感冒", 10, dispatcher -> {
  32. dispatcher.addAudioProcessor(new DelayEffect(0.2, 0.24, 12000));
  33. dispatcher.addAudioProcessor(new ZeroCrossingRateProcessor());
  34. }),
  35. /**
  36. * dispatcher.addAudioProcessor(new DelayEffect(0.8, 0.5, 12000) );
  37. * dispatcher.addAudioProcessor(new DelayEffect(0.5, 0.3, 8000) );
  38. */
  39. KONGLING(1, 1, "空灵", 11, dispatcher -> {
  40. dispatcher.addAudioProcessor(new DelayEffect(0.8, 0.5, 12000) );
  41. dispatcher.addAudioProcessor(new DelayEffect(0.5, 0.3, 8000) );
  42. });
  43. /**
  44. * @param speedFactor 变速率 (0,2) 大于1为加快语速,小于1为放慢语速
  45. * @param rateFactor 音调变化率 (0,2) 大于1为降低音调(深沉),小于1为提升音调(尖锐)
  46. */
  47. SoundEnum(double rateFactor, double speedFactor, String name, int type, Consumer<AudioDispatcher> consumer){
  48. this.rateFactor = rateFactor;
  49. this.speedFactor = speedFactor;
  50. this.name = name;
  51. this.type = type;
  52. this.consumer = consumer;
  53. }
  54. private double rateFactor;
  55. private double speedFactor;
  56. private String name;
  57. private int type;
  58. private Consumer consumer;
  59. public byte[] run(String fileUrl){
  60. WaveformSimilarityBasedOverlapAdd w = new WaveformSimilarityBasedOverlapAdd(WaveformSimilarityBasedOverlapAdd.Parameters.speechDefaults(rateFactor, 16000));
  61. int inputBufferSize = w.getInputBufferSize();
  62. int overlap = w.getOverlap();
  63. AudioDispatcher dispatcher = AudioDispatcherFactory.fromPipe(fileUrl,16000,inputBufferSize,overlap);
  64. w.setDispatcher(dispatcher);
  65. dispatcher.addAudioProcessor(w);
  66. /** 采样率转换器。 使用插值更改采样率, 与时间拉伸器一起可用于音高转换。 **/
  67. dispatcher.addAudioProcessor(new RateTransposer(speedFactor));
  68. AudioOutputToByteArray out = new AudioOutputToByteArray();
  69. consumer.accept(dispatcher);
  70. dispatcher.addAudioProcessor(out);
  71. dispatcher.run();
  72. return out.getData();
  73. }
  74. public static byte[] pcm2wav(byte[] bytes) {
  75. try {
  76. //填入参数,比特率等等。这里用的是16位单声道 8000 hz
  77. WaveHeader header = new WaveHeader();
  78. //长度字段 = 内容的大小(PCMSize) + 头部字段的大小(不包括前面4字节的标识符RIFF以及fileLength本身的4字节)
  79. header.fileLength = bytes.length + (44 - 8);
  80. header.FmtHdrLeth = 16;
  81. header.BitsPerSample = 16;
  82. header.Channels = 1;
  83. header.FormatTag = 0x0001;
  84. header.SamplesPerSec = 16000;
  85. header.BlockAlign = (short)(header.Channels * header.BitsPerSample / 8);
  86. header.AvgBytesPerSec = header.BlockAlign * header.SamplesPerSec;
  87. header.DataHdrLeth = bytes.length;
  88. byte[] h = header.getHeader();
  89. assert h.length == 44; //WAV标准,头部应该是44字节
  90. return h;
  91. } catch (IOException e) {
  92. //log.error("pcm2wav-error", e);
  93. }
  94. return null;
  95. }
  96. public static Optional<SoundEnum> getInstance(int type){
  97. for (int i = 0; i < SoundEnum.values().length; i++) {
  98. if(SoundEnum.values()[i].type == type)
  99. return Optional.of(SoundEnum.values()[i]);
  100. }
  101. return Optional.empty();
  102. }
  103. }

发表评论

表情:
评论列表 (有 0 条评论,92人围观)

还没有评论,来说两句吧...

相关阅读

    相关 原理:卷积和传递函数

    idea 关于系统 我们将一个空旷的房间类比于一个系统,如果我们在房间内放置一个声源,声音信号经过墙壁,天花板,地面,放置的种种物品的反射,最终会被我们人耳或者声音