Skip to content

Commit 71428b1

Browse files
committed
feat:采用新的断句逻辑
1 parent 9952798 commit 71428b1

File tree

4 files changed

+161
-61
lines changed

4 files changed

+161
-61
lines changed

internal/app/srt_generate.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func (s *SrtGenerator) Do(ctx context.Context, task *GenerateTask, doneCallBack
4949
err error
5050
)
5151
defer doneCallBack(s)
52-
logrus.Infof("get generate task %s",task)
52+
logrus.Infof("get generate task %s", task)
5353
//前置检查
5454
absVideo, err = filepath.Abs(task.SrcFile)
5555
if err != nil {
@@ -75,7 +75,7 @@ func (s *SrtGenerator) Do(ctx context.Context, task *GenerateTask, doneCallBack
7575
task.DstFile = wsrtDstFilePath
7676

7777
task.State = TaskStateDoing
78-
logrus.Infof("start extract audio [%s]",task)
78+
logrus.Infof("start extract audio [%s]", task)
7979
//1. 抽取音频
8080
task.Step = GenerateStepAudio
8181
extractor := voice.NewExtractor(strconv.Itoa(int(conf.G_Config.SampleRate)), conf.G_Config.FFmpeg)
@@ -115,11 +115,11 @@ func (s *SrtGenerator) Do(ctx context.Context, task *GenerateTask, doneCallBack
115115
return
116116
}
117117
defer os.Remove(dstAudioFile)
118-
logrus.Infof("end extract audio %s",task)
118+
logrus.Infof("end extract audio %s", task)
119119

120120
//2. 存储
121121
task.Step = GenerateStepOss
122-
logrus.Infof("start upload file [%s]",task)
122+
logrus.Infof("start upload file [%s]", task)
123123
uri, objName, err = s.storage.UploadFile(dstAudioFile)
124124
if err != nil {
125125
task.Failed(err)
@@ -128,18 +128,18 @@ func (s *SrtGenerator) Do(ctx context.Context, task *GenerateTask, doneCallBack
128128
return
129129
}
130130
defer s.storage.DeleteFile(objName)
131-
logrus.Infof("end upload file [%s]",task)
131+
logrus.Infof("end upload file [%s]", task)
132132
//3. 识别
133133
task.Step = GenerateStepRecognize
134-
logrus.Infof("start recognize [%s]",task)
134+
logrus.Infof("start recognize [%s]", task)
135135
sret, wret, err = s.speech.Recognize(ctx, uri)
136136
if err != nil {
137137
task.Failed(err)
138138
task.State = TaskStateFailed
139139
logrus.Errorf("task[%s] recognize failed [%v]", task, err)
140140
return
141141
}
142-
logrus.Infof("end recognize [%s] result sret [%d], wret[%d]",task,len(sret), len(wret))
142+
logrus.Infof("end recognize [%s] result sret [%d], wret[%d]", task, len(sret), len(wret))
143143
//4. 输出
144144
task.Step = GenerateStepGenerateSrt
145145
err = srt.WriteSrt(wsrtDstFilePath, wret)
@@ -156,6 +156,6 @@ func (s *SrtGenerator) Do(ctx context.Context, task *GenerateTask, doneCallBack
156156
logrus.Errorf("task[%s] write srt failed [%v]", task, err)
157157
return
158158
}
159-
logrus.Infof("write srt end [%s]",task)
159+
logrus.Infof("write srt end [%s]", task)
160160
task.State = TaskStateDone
161161
}

internal/text/aliyun/aliyun.go

Lines changed: 148 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (s *Speech) Recognize(ctx context.Context, fileUri string) (sRet []*srt.Srt
5757
if err != nil {
5858
return
5959
}
60-
logrus.Infof("aliyun speech add task %s",fileUri)
60+
logrus.Infof("aliyun speech add task %s", fileUri)
6161
taskId, err = s.sendTask(client, fileUri)
6262
if err != nil {
6363
return
@@ -71,12 +71,12 @@ func (s *Speech) Recognize(ctx context.Context, fileUri string) (sRet []*srt.Srt
7171
err = errors.WithMessage(err, "recognize failed")
7272
return
7373
}
74-
logrus.Infof("aliyun speech recognize result [%d]",len(rsp.Result.Sentences))
74+
logrus.Infof("aliyun speech recognize result [%d]", len(rsp.Result.Sentences))
7575
sRet, err = s.Sentence(0, rsp)
76-
wRet, err = s.BreakSentence(0, rsp)
76+
wRet, err = s.NewBreakSentence(0, rsp)
7777
if err != nil {
7878
data, _ := json.Marshal(rsp)
79-
logrus.Errorf("break sentence failed [%+v]",err)
79+
logrus.Errorf("break sentence failed [%+v]", err)
8080
_ = ioutil.WriteFile(fmt.Sprintf("log/dump_%d.json", time.Now().Unix()), data, os.ModePerm)
8181
err = nil
8282
}
@@ -165,48 +165,8 @@ func (s *Speech) sendTask(client *sdk.Client, URI string) (taskId string, err er
165165
}
166166

167167
func (s *Speech) BreakSentence(channelId int, rsp *Response) (ret []*srt.Srt, err error) {
168-
var (
169-
newLine bool
170-
idx int
171-
)
172-
//1. 重新断句
173-
idx = 0
174-
newLine = true
175-
tmpSrt := &srt.Srt{}
176-
for _, sentence := range rsp.Result.Sentences {
177-
//不是目标通道就过掉
178-
if sentence.ChannelId != channelId {
179-
continue
180-
}
181-
//1.1 按照空格切词
182-
words := strings.Split(sentence.Text, " ")
183-
//1.2 断句
184-
for _, word := range words {
185-
word = strings.TrimSpace(word)
186-
if word == "" {
187-
continue
188-
}
189-
//句子结尾
190-
if strings.ContainsAny(word, ",.?!,。?!") {
191-
tmpSrt.Subtitle += " " + word
192-
newLine = true
193-
continue
194-
}
195-
//新句子开头
196-
if newLine == true {
197-
idx += 1
198-
tmpSrt = &srt.Srt{
199-
Sequence: idx,
200-
Subtitle: word,
201-
}
202-
ret = append(ret, tmpSrt)
203-
newLine = false
204-
} else { //句子中间
205-
tmpSrt.Subtitle += " " + word
206-
}
207-
}
208-
}
209168

169+
ret = s.SplitSentence(channelId, rsp)
210170
re, _ := regexp.Compile(regexNumber)
211171
curIdx := 0
212172
for _, itr := range ret {
@@ -218,9 +178,9 @@ func (s *Speech) BreakSentence(channelId int, rsp *Response) (ret []*srt.Srt, er
218178
}
219179

220180
sword = strings.ToLower(strings.TrimFunc(strings.TrimSpace(sword), func(r rune) bool {
221-
if strings.ContainsRune(text.SentenceBreak,r){
181+
if strings.ContainsRune(text.SentenceBreak, r) {
222182
return true
223-
}else{
183+
} else {
224184
return false
225185
}
226186
return false
@@ -310,3 +270,144 @@ func (s *Speech) Sentence(channelId int, rsp *Response) (ret []*srt.Srt, err err
310270

311271
return
312272
}
273+
274+
func (s *Speech) SplitSentence(channelId int, rsp *Response) (ret []*srt.Srt) {
275+
var (
276+
newLine bool
277+
idx int
278+
)
279+
//1. 重新断句
280+
idx = 0
281+
newLine = true
282+
tmpSrt := &srt.Srt{}
283+
for _, sentence := range rsp.Result.Sentences {
284+
//不是目标通道就过掉
285+
if sentence.ChannelId != channelId {
286+
continue
287+
}
288+
//1.1 按照空格切词
289+
words := strings.Split(sentence.Text, " ")
290+
//1.2 断句
291+
for _, word := range words {
292+
word = strings.TrimSpace(word)
293+
if word == "" {
294+
continue
295+
}
296+
//句子结尾
297+
if strings.ContainsAny(word, ",.?!,。?!") {
298+
tmpSrt.Subtitle += " " + word
299+
newLine = true
300+
continue
301+
}
302+
//新句子开头
303+
if newLine == true {
304+
idx += 1
305+
tmpSrt = &srt.Srt{
306+
Sequence: idx,
307+
Subtitle: word,
308+
}
309+
ret = append(ret, tmpSrt)
310+
newLine = false
311+
} else { //句子中间
312+
tmpSrt.Subtitle += " " + word
313+
}
314+
}
315+
}
316+
317+
return
318+
}
319+
320+
func (s *Speech) NewBreakSentence(channelId int, rsp *Response) (ret []*srt.Srt, err error) {
321+
var (
322+
curIdx = 0
323+
)
324+
325+
ret = s.SplitSentence(channelId, rsp)
326+
re, _ := regexp.Compile(regexNumber)
327+
swStack := []*srt.Srt{}
328+
wStack := []*srt.Srt{}
329+
330+
for sIdx, itr := range ret {
331+
sentenceWords := strings.Split(itr.Subtitle, " ")
332+
for swIdx, sw := range sentenceWords { //句子中的词
333+
334+
//提取原始句子中的词
335+
sword := sw
336+
if strings.ContainsAny(sw, text.SentenceBreak) {
337+
sword = strings.TrimRight(sword, text.SentenceBreak)
338+
}
339+
340+
sword = strings.ToLower(strings.TrimFunc(strings.TrimSpace(sword), func(r rune) bool {
341+
if strings.ContainsRune(text.SentenceBreak, r) {
342+
return true
343+
} else {
344+
return false
345+
}
346+
return false
347+
}))
348+
349+
for wIdx := curIdx; wIdx < len(rsp.Result.Words); wIdx++ {
350+
//更新curIdx
351+
if rsp.Result.Words[wIdx].ChannelId != channelId {
352+
continue
353+
}
354+
word := strings.ToLower(strings.TrimSpace(rsp.Result.Words[wIdx].Word))
355+
356+
fmt.Printf("sw:%s , w: %s info: %+v\n", sword, word, rsp.Result.Words[wIdx])
357+
if s.Equal(sword, word) {
358+
359+
//句子首词匹配
360+
if swIdx == 0 {
361+
itr.Start = utils.MillisDurationConv(rsp.Result.Words[wIdx].BeginTime)
362+
//句子尾
363+
} else if swIdx == len(sentenceWords)-1 {
364+
itr.End = utils.MillisDurationConv(rsp.Result.Words[wIdx].EndTime)
365+
}
366+
367+
//判断栈中是否为空
368+
if len(swStack) > 0 {
369+
//前一个句子没有处理完
370+
if swIdx == 0 {
371+
ret[sIdx-1].End = utils.MillisDurationConv(rsp.Result.Words[wIdx].BeginTime)
372+
}
373+
//当前句子的首词被暂存
374+
if itr.Start == "" {
375+
//取暂存的中第一词
376+
itr.Start = wStack[len(wStack)-1].Start
377+
}
378+
swStack = swStack[:0]
379+
wStack = wStack[:0]
380+
}
381+
382+
curIdx = wIdx + 1
383+
break //配对下一个词
384+
} else {
385+
386+
//词结果中需要暂存的情况
387+
if _, ok := s.wellKnownNumber[word]; ok {
388+
wStack = append(wStack, &srt.Srt{
389+
Sequence: 0,
390+
Start: utils.MillisDurationConv(rsp.Result.Words[wIdx].BeginTime),
391+
End: utils.MillisDurationConv(rsp.Result.Words[wIdx].EndTime),
392+
Subtitle: word,
393+
})
394+
continue
395+
}
396+
397+
//句子中词需要暂存的情况
398+
if re.Match([]byte(sword)) {
399+
swStack = append(swStack, &srt.Srt{
400+
Sequence: 0,
401+
Start: "",
402+
End: "",
403+
Subtitle: sword,
404+
})
405+
break
406+
}
407+
}
408+
}
409+
}
410+
}
411+
412+
return
413+
}

internal/text/aliyun/aliyun_test.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package aliyun
33
import (
44
"encoding/json"
55
"fmt"
6-
"io/ioutil"
76
"testing"
87
)
98

@@ -59,12 +58,12 @@ const (
5958
)
6059

6160
func TestSpeech_BreakSentence(t *testing.T) {
62-
casex, err := ioutil.ReadFile("/Users/apple/go/src/github.com/JerryZhou343/ClosedCaption/bin/log/dump_1591410628.json")
61+
//casex, err := ioutil.ReadFile("/Users/apple/go/src/github.com/JerryZhou343/ClosedCaption/bin/log/dump_1591410628.json")
6362
rsp := &Response{}
64-
json.Unmarshal([]byte(casex), rsp)
63+
json.Unmarshal([]byte(case1), rsp)
6564
sp := NewSpeech("", "", "",
6665
wellKnownNumber, wellKnownWord)
67-
ret, err := sp.(*Speech).BreakSentence(0, rsp)
66+
ret, err := sp.(*Speech).NewBreakSentence(0, rsp)
6867
if err != nil {
6968
t.Errorf("%+v", err)
7069
return

main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ import (
99

1010
var (
1111
Major = 1
12-
Minor = 1
13-
Patch = 5
12+
Minor = 2
13+
Patch = 0
1414
)
1515

1616
func main() {

0 commit comments

Comments
 (0)