优化后的架构如下:

  1. 首先,将音频数据分帧发送给前端,而不是一次性发送所有音频数据。

  2. 修改ModelSTARes结构体,将音频数据的Frame字段改为可变长度的切片。

type ModelSTARes struct {
	Code int `json:"code"`
	Data struct {
		ContentType    string    `json:"ContentType"`
		Content        string    `json:"content"`
		VesAnimeResult struct {
			Order []string  `json:"Order"`
			Coefl float32   `json:"Coefl"`
			Fps   float64   `json:"fps"`
			Frame []int32   `json:"Frame"`
		} `json:"ves_anime_result"`
		Information struct {
			PhoneTimestamp [][]interface{} `json:"phone_timestamp"`
			WordTimestamp  [][]interface{} `json:"word_timestamp"`
		} `json:"information"`
		Text     []string `json:"text"`
		Desc     string   `json:"desc"`
		IsLast   bool     `json:"is_last"`
		IsFinish bool     `json:"is_finish"`
		IsFirst  bool     `json:"is_first"`
	} `json:"data"`
}
  1. 修改HandelModelData函数,将音频数据分帧发送给前端。
func HandelModelData(frames [][]int32, coefl float32, token string, commonInfo string, seq int) {
	var finish bool = true
	var sig chan int
	{
		var ok bool
		driverMutex.Lock()
		sig, ok = token2driver[token]
		if !ok {
			sig = make(chan int, 1)
			token2driver[token] = sig
		} else {
			logs.Info("%s, emmit sig flag, to quit last request", commonInfo)
			sig <- 0 // 退出前面的驱动
		}
		driverMutex.Unlock()
	}

	defer func() {
		if finish {
			driverMutex.Lock()
			_, ok := token2driver[token]
			if ok {
				close(sig)
				delete(token2driver, token)
			}
			driverMutex.Unlock()
			logs.Info("%s, finish", commonInfo)
		}
	}()

	api := appconf.StreamInfoApi + "?token=" + token
	status, stream := util.OnGetHttpTimeout(api, 10)
	if status != 200 {
		logs.Error("%s, api = %v, status = %d, result = %s", commonInfo, api, status, string(stream))
		return
	}
	var streamInfoRes protocol.StreamInfoRes
	err := json.Unmarshal(stream, &streamInfoRes)
	if err != nil {
		logs.Error("%s, err = %v", commonInfo, err)
		return
	}
	if streamInfoRes.Code != 200 {
		logs.Error("%s HandelModelData, err = %v", commonInfo, streamInfoRes)
	}
	sip := streamInfoRes.Data.Ip
	sport := 11111
	if sip == "" {
		logs.Error("%s, invalid ip", commonInfo)
		return
	}
	socket, err := net.DialUDP("udp", nil, &net.UDPAddr{
		IP:   net.ParseIP(sip),
		Port: sport,
	})
	if err != nil {
		logs.Error("%s, ip = %s, port = %d, err = %v", commonInfo, sip, sport, err)
		return
	}
	defer socket.Close()

	_ = StartBSDriver(token, seq)

	logs.Info("%s, sip = %s, sport = %d", commonInfo, sip, sport)

	{
		startReq := make(map[string]interface{})
		startReq["userIp"] = sip
		startData, _ := json.Marshal(&startReq)
		var nsent = 0
		for nsent < len(startData) {
			n, err := socket.Write(startData[nsent:])
			if err != nil {
				logs.Error("%s, ip = %s, port = %d, err = %v", commonInfo, sip, sport, err)
			}
			nsent += n
		}
		logs.Info("%s, ip = %s, port = %d, OnAudioDriver", commonInfo, sip, sport)
	}

Loop:
	for i := 0; i < len(frames); i++ {
		frame := make([]float32, 61)
		for j := 0; j < 51; j++ {
			if j == 1 || j == 4 || j == 11 || j == 40 || j == 41 || j == 42 || j == 43 || j == 44 || j == 45 || j == 8 || j == 9 || j == 10 {
				if frames[i][j] != 0 {
					frames[i][j] = 0
				}
			}
			frame[j] = float32(frames[i][j]) / coefl
		}
		select {
		case <-sig:
			logs.Info("%s, recv quit sig, quit now", commonInfo)
			finish = false
			break Loop
		default:
			data := MakeBSFrame(frame)
			for _, d := range data {
				var nsent = 0
				for nsent < len(d) {
					n, err := socket.Write(d[nsent:])
					if err != nil {
						fmt.Println("send, err =", err)
					}
					nsent += n
				}
				time.Sleep(20 * time.Millisecond)
				//time.Sleep(15*time.Millisecond + 650*time.Microsecond)
			}
		}
	}
	_ = StopBSDriver(token, seq)
	return
}

这样,前端可以根据音频参数逐帧接收音频数据

请将下面代码重新架构优化为流式传输音频而不是完整的传输给前端根据音频参数发送帧数据下面是代码:type ModelSTARes struct 	Code int jsoncode	Data struct 		ContentType string jsonContentType		Content string jsoncontent		VesAnimeResult struct

原文地址: https://www.cveoy.top/t/topic/ic5y 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录