【edge-tts】用web.SocketSharp实现微软文本转语音

小肥羊 11月前 1378 v 1.0 2024-04-27

方法来自python的edge-tts,做了一下解析。

开始以为websocketShark有bug,跑去找校长反馈,结果意料之中,被校长教育了,\n  和\r\n没分清楚。。。。。。。

下面是一个实现的范例,仅仅实现了功能,如果需要封装,还需要自己去改改看。

Code AardioLine:77复制
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63.
  • 64.
  • 65.
  • 66.
  • 67.
  • 68.
  • 69.
  • 70.
  • 71.
  • 72.
  • 73.
  • 74.
  • 75.
  • 76.
  • 77.
    • import win.ui;
    • /*DSG{{*/
    • var winform = win.form(text="aardio form";right=759;bottom=469)
    • winform.add(
    • button={cls="button";text="演示";left=255;top=355;right=511;bottom=454;z=1};
    • edit={cls="edit";left=18;top=21;right=706;bottom=326;edge=1;multiline=1;z=2}
    • )
    • /*}}*/
    • import console;
    • import web.SocketSharp;
    • wssurl = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4"
    • sconfig = 'X-Timestamp:Mon Apr 22 2024 05:54:13 GMT+0000 (Coordinated Universal Time)\r\n
    • Content-Type:application/json; charset=utf-8\r\n
    • Path:speech.config\r\n
    • \r\n
    • {"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}\r\n
    • ';
    • ssml = 'X-RequestId:1e7580ccbdbb460a8b79ae0d7b7394be\r\n
    • Content-Type:application/ssml+xml\r\n
    • X-Timestamp:Mon Apr 22 2024 05:54:13 GMT+0000 (Coordinated Universal Time)Z\r\n
    • Path:ssml\r\n
    • \r\n
    • <speak version=\'1.0\' xmlns=\'http://www.w3.org/2001/10/synthesis\' xml:lang=\'en-US\'><voice name=\'Microsoft Server Speech Text to Speech Voice (zh-CN, YunxiNeural)\'><prosody pitch=\'+0Hz\' rate=\'-4%\' volume=\'+0%\'>这里输入要朗读的文本</prosody></voice></speak>
    • ';
    • ws = web.SocketSharp.WebSocket(wssurl)
    • ws.OnOpen = function(sender, e) {
    • winform.edit.print("连接已打开");
    • ws.Send(sconfig);
    • ws.Send(ssml);
    • }
    • ws.OnClose = function(sender, e) {
    • winform.edit.print("已关闭连接", e.Reason);
    • }
    • ws.OnError = function(sender, e) {
    • winform.edit.print(e.Message);
    • }
    • var data = "";
    • var savepath = "e:\test-tts.mp3"
    • ws.OnMessage = function(sender, e) {
    • var tmpstr = e.Data;
    • if (e.IsBinary) {
    • tmpstr = raw.tostring(e.RawData)
    • }
    • var tmplist = string.splitEx(tmpstr, '\r\n');
    • if (#tmplist >= 4) {
    • if (tmplist[2] = "Content-Type:audio/mpeg" && tmplist[4] == "Path:audio") {
    • data = data + tmplist[5]
    • }
    • if (tmplist[3] == "Path:turn.end") {
    • ws.CloseAsync();
    • res = ..string.save(savepath, data)
    • ..console.dump("保存文件:",res,savepath)
    • winform.edit.print(savepath);
    • }
    • }
    • ..console.dump("收到服务端消息:", "二进制数据:", (e.IsBinary), "文本数据:", e.IsText, "段落数:"++#tmplist)
    • }
    • winform.button.oncommand = function(id, event) {
    • ws.Connect()
    • }
    • winform.show();
    • win.loopMessage();
    • return winform;
    最新回复 (3)
    • 光庆 11月前
      0 2
      送你两个字:哈哈哈哈
    • grok 11月前
      0 3
      那是 4 个字。。。。
    • grok 11月前
      0 4
      还找校长,喔喔喔
    返回