Files
transcribe/internal/diarization/align.go
2026-01-17 19:18:58 -06:00

60 lines
1.3 KiB
Go

package diarization
import (
"transcribe/internal/whisper"
)
// AlignSpeakers maps speaker segments to transcription segments by timestamp overlap
func AlignSpeakers(transcription *whisper.TranscriptionResult, diarization *DiarizationResult) {
if diarization == nil || len(diarization.Speakers) == 0 {
return
}
for i := range transcription.Segments {
seg := &transcription.Segments[i]
speaker := findSpeakerForSegment(seg.Start, seg.End, diarization.Speakers)
seg.Speaker = speaker
}
}
// findSpeakerForSegment finds the speaker with the most overlap with the given time range
func findSpeakerForSegment(start, end float64, speakers []SpeakerSegment) string {
var bestSpeaker string
var maxOverlap float64
for _, spk := range speakers {
overlap := calculateOverlap(start, end, spk.Start, spk.End)
if overlap > maxOverlap {
maxOverlap = overlap
bestSpeaker = spk.Speaker
}
}
return bestSpeaker
}
// calculateOverlap returns the duration of overlap between two time ranges
func calculateOverlap(start1, end1, start2, end2 float64) float64 {
overlapStart := max(start1, start2)
overlapEnd := min(end1, end2)
if overlapEnd > overlapStart {
return overlapEnd - overlapStart
}
return 0
}
func max(a, b float64) float64 {
if a > b {
return a
}
return b
}
func min(a, b float64) float64 {
if a < b {
return a
}
return b
}