feat: git init

This commit is contained in:
2026-01-17 19:18:58 -06:00
commit b73d5b8078
18 changed files with 1274 additions and 0 deletions

172
cmd/root.go Normal file
View File

@@ -0,0 +1,172 @@
package cmd
import (
"fmt"
"os"
"transcribe/internal/diarization"
"transcribe/internal/whisper"
"transcribe/pkg/audio"
"transcribe/pkg/output"
"transcribe/pkg/progress"
"github.com/spf13/cobra"
)
var Version = "dev"
var outputFile string
var outputFormat string
var diarize bool
var numSpeakers int
var modelSize string
var noWrite bool
// rootCmd represents the base command when called without any subcommands
var rootCmd = &cobra.Command{
Use: "transcribe",
Short: "A CLI tool for transcribing audio files with speaker diarization",
Long: `Transcribe is a command-line tool that uses OpenAI's Whisper model to
transcribe audio files. It supports multiple output formats (text, SRT, JSON)
and speaker diarization using voice embeddings.
Output file (-o) is required unless --no-write is specified.
Output Formats:
srt SRT subtitle format (default)
text Plain text with timestamps
json JSON with full metadata
Whisper Models (--model, -m):
tiny Fastest, least accurate (default)
base Fast, basic accuracy
small Balanced speed/accuracy
medium Good accuracy, slower
large Best accuracy, slowest
turbo Optimized for speed
Examples:
# Basic transcription to SRT
transcribe audio.mp3 -o output.srt
# Use a larger model
transcribe audio.mp3 --model small -o output.srt
# Output as plain text
transcribe audio.mp3 --format text -o output.txt
# Enable speaker diarization
transcribe audio.mp3 --diarize -o output.srt
# Print to stdout instead of file
transcribe audio.mp3 --no-write
# Full example: diarization + specific model
transcribe audio.mp3 --model small --diarize -s 2 -o output.srt`,
Run: func(cmd *cobra.Command, args []string) {
if len(args) == 0 {
fmt.Println("Please provide audio files to transcribe")
_ = cmd.Help()
os.Exit(1)
}
// Require output file unless --no-write is set
if outputFile == "" && !noWrite {
fmt.Println("✗ Error: Output file required. Use -o <file> to specify output, or --no-write to print to stdout.")
os.Exit(1)
}
// Validate all provided files
for _, file := range args {
if _, err := os.Stat(file); os.IsNotExist(err) {
fmt.Printf("✗ Error: File '%s' does not exist\n", file)
os.Exit(1)
}
_, err := audio.NewAudioFile(file)
if err != nil {
fmt.Printf("✗ Error: File '%s' has unsupported format or error: %v\n", file, err)
os.Exit(1)
}
}
// Create whisper client and transcribe
whisperClient := whisper.NewClient(whisper.ModelSize(modelSize))
whisperOptions := whisper.DefaultTranscriptionOptions()
// Create diarization client if needed
var diarizationClient *diarization.Client
var diarizationOptions *diarization.DiarizationOptions
if diarize {
diarizationClient = diarization.NewClient()
diarizationOptions = &diarization.DiarizationOptions{
NumSpeakers: numSpeakers,
}
}
// Create output formatter
formatter := output.NewFormatter(output.FormatType(outputFormat))
for _, file := range args {
// Transcription with spinner
spinner := progress.NewSpinner(fmt.Sprintf("Transcribing %s (model: %s)...", file, modelSize))
spinner.Start()
result, err := whisperClient.Transcribe(file, whisperOptions)
if err != nil {
spinner.StopWithMessage(fmt.Sprintf("✗ Error transcribing %s: %v", file, err))
continue
}
spinner.StopWithMessage(fmt.Sprintf("✓ Transcribed %s (%.1fs audio)", file, result.Duration))
// Run diarization if enabled
if diarize {
spinner := progress.NewSpinner("Detecting speakers...")
spinner.Start()
diarizationResult, err := diarizationClient.Diarize(file, diarizationOptions)
if err != nil {
spinner.StopWithMessage(fmt.Sprintf("✗ Diarization failed: %v", err))
} else {
spinner.StopWithMessage(fmt.Sprintf("✓ Detected %d speaker(s)", diarizationResult.NumSpeakers))
diarization.AlignSpeakers(result, diarizationResult)
}
}
// Format output
formattedOutput, err := formatter.Format(result)
if err != nil {
fmt.Printf("Error formatting output: %v\n", err)
continue
}
// Write to file or stdout
if outputFile != "" {
err := os.WriteFile(outputFile, []byte(formattedOutput), 0644)
if err != nil {
fmt.Printf("✗ Error writing output file: %v\n", err)
} else {
fmt.Printf("✓ Saved to %s\n", outputFile)
}
} else {
fmt.Printf("\n%s\n", formattedOutput)
}
}
},
}
func init() {
rootCmd.Version = Version
rootCmd.PersistentFlags().StringVarP(&outputFile, "output", "o", "", "Output file path (required)")
rootCmd.PersistentFlags().StringVarP(&outputFormat, "format", "f", "srt", "Output format: text, srt, json")
rootCmd.PersistentFlags().BoolVar(&diarize, "diarize", false, "Enable speaker diarization")
rootCmd.PersistentFlags().IntVarP(&numSpeakers, "speakers", "s", 0, "Number of speakers (0 = auto-detect)")
rootCmd.PersistentFlags().StringVarP(&modelSize, "model", "m", "tiny", "Whisper model: tiny, base, small, medium, large, turbo")
rootCmd.PersistentFlags().BoolVar(&noWrite, "no-write", false, "Print output to stdout instead of file")
}
// Execute adds all child commands to the root command and sets flags appropriately.
func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}