60 lines
1.3 KiB
Go
60 lines
1.3 KiB
Go
package diarization
|
|
|
|
import (
|
|
"transcribe/internal/whisper"
|
|
)
|
|
|
|
// AlignSpeakers maps speaker segments to transcription segments by timestamp overlap
|
|
func AlignSpeakers(transcription *whisper.TranscriptionResult, diarization *DiarizationResult) {
|
|
if diarization == nil || len(diarization.Speakers) == 0 {
|
|
return
|
|
}
|
|
|
|
for i := range transcription.Segments {
|
|
seg := &transcription.Segments[i]
|
|
speaker := findSpeakerForSegment(seg.Start, seg.End, diarization.Speakers)
|
|
seg.Speaker = speaker
|
|
}
|
|
}
|
|
|
|
// findSpeakerForSegment finds the speaker with the most overlap with the given time range
|
|
func findSpeakerForSegment(start, end float64, speakers []SpeakerSegment) string {
|
|
var bestSpeaker string
|
|
var maxOverlap float64
|
|
|
|
for _, spk := range speakers {
|
|
overlap := calculateOverlap(start, end, spk.Start, spk.End)
|
|
if overlap > maxOverlap {
|
|
maxOverlap = overlap
|
|
bestSpeaker = spk.Speaker
|
|
}
|
|
}
|
|
|
|
return bestSpeaker
|
|
}
|
|
|
|
// calculateOverlap returns the duration of overlap between two time ranges
|
|
func calculateOverlap(start1, end1, start2, end2 float64) float64 {
|
|
overlapStart := max(start1, start2)
|
|
overlapEnd := min(end1, end2)
|
|
|
|
if overlapEnd > overlapStart {
|
|
return overlapEnd - overlapStart
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func max(a, b float64) float64 {
|
|
if a > b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func min(a, b float64) float64 {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|