feat: git init

This commit is contained in:
2026-01-17 19:18:58 -06:00
commit b73d5b8078
18 changed files with 1274 additions and 0 deletions

162
internal/whisper/client.go Normal file
View File

@@ -0,0 +1,162 @@
package whisper
import (
"bytes"
"encoding/json"
"fmt"
"os/exec"
)
// ModelSize represents the different Whisper model sizes
type ModelSize string
const (
ModelTiny ModelSize = "tiny"
ModelBase ModelSize = "base"
ModelSmall ModelSize = "small"
ModelMedium ModelSize = "medium"
ModelLarge ModelSize = "large"
ModelTurbo ModelSize = "turbo"
)
// TranscriptionResult contains the transcription output
type TranscriptionResult struct {
Text string `json:"text"`
Segments []Segment `json:"segments"`
Language string `json:"language"`
Duration float64 `json:"duration"`
}
// Segment represents a segment of transcription with timestamps
type Segment struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
Words []Word `json:"words,omitempty"`
Speaker string `json:"speaker,omitempty"`
}
// Word represents a word with timestamp
type Word struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Word string `json:"word"`
}
// Client is the Whisper client that handles transcription
type Client struct {
ModelPath string
ModelSize ModelSize
}
// NewClient creates a new Whisper client
func NewClient(modelSize ModelSize) *Client {
return &Client{
ModelSize: modelSize,
}
}
// Transcribe processes an audio file and returns transcription
func (c *Client) Transcribe(audioPath string, options *TranscriptionOptions) (*TranscriptionResult, error) {
if options == nil {
options = &TranscriptionOptions{}
}
// Build the Python command
cmd := exec.Command("python3", "-c", c.buildPythonCommand(audioPath, options))
// Capture stdout and stderr
var out bytes.Buffer
var errBuf bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &errBuf
// Execute the command
err := cmd.Run()
if err != nil {
return nil, fmt.Errorf("transcription failed: %v, stderr: %s", err, errBuf.String())
}
// Parse the JSON output
var result TranscriptionResult
err = json.Unmarshal(out.Bytes(), &result)
if err != nil {
return nil, fmt.Errorf("failed to parse transcription output: %v", err)
}
return &result, nil
}
// buildPythonCommand constructs the Python command for Whisper
func (c *Client) buildPythonCommand(audioPath string, options *TranscriptionOptions) string {
// Convert Go bool to Python bool string
verboseStr := "False"
if options.Verbose {
verboseStr = "True"
}
// Handle language option
langStr := "None"
if options.Language != "" && options.Language != "auto" {
langStr = fmt.Sprintf(`"%s"`, options.Language)
}
pythonCode := fmt.Sprintf(`
import whisper
import json
import sys
import os
import warnings
# Suppress warnings and stdout during transcription
warnings.filterwarnings("ignore")
old_stdout = sys.stdout
sys.stdout = open(os.devnull, 'w')
# Load model
model = whisper.load_model("%s")
# Transcribe
result = model.transcribe("%s",
language=%s,
verbose=%s,
temperature=%.1f,
best_of=%d)
# Restore stdout for JSON output
sys.stdout = old_stdout
# Output as JSON
print(json.dumps({
"text": result["text"],
"language": result.get("language", ""),
"duration": result.get("duration", 0.0),
"segments": [{
"start": seg["start"],
"end": seg["end"],
"text": seg["text"],
"words": seg.get("words", [])
} for seg in result.get("segments", [])]
}))
`, c.ModelSize, audioPath, langStr, verboseStr, options.Temperature, options.BestOf)
return pythonCode
}
// TranscriptionOptions contains options for transcription
type TranscriptionOptions struct {
Language string // Language code or "auto"
Verbose bool // Show progress bar
Temperature float64 // Temperature for sampling (higher = more creative)
BestOf int // Number of candidates when sampling with temperature > 0
}
// DefaultTranscriptionOptions returns default transcription options
func DefaultTranscriptionOptions() *TranscriptionOptions {
return &TranscriptionOptions{
Language: "auto",
Verbose: false,
Temperature: 0.0,
BestOf: 5,
}
}