feat: git init

2026-01-17 19:18:58 -06:00
commit b73d5b8078
18 changed files with 1274 additions and 0 deletions
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -0,0 +1,172 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+
+	"transcribe/internal/diarization"
+	"transcribe/internal/whisper"
+	"transcribe/pkg/audio"
+	"transcribe/pkg/output"
+	"transcribe/pkg/progress"
+
+	"github.com/spf13/cobra"
+)
+
+var Version = "dev"
+
+var outputFile string
+var outputFormat string
+var diarize bool
+var numSpeakers int
+var modelSize string
+var noWrite bool
+
+// rootCmd represents the base command when called without any subcommands
+var rootCmd = &cobra.Command{
+	Use:   "transcribe",
+	Short: "A CLI tool for transcribing audio files with speaker diarization",
+	Long: `Transcribe is a command-line tool that uses OpenAI's Whisper model to
+transcribe audio files. It supports multiple output formats (text, SRT, JSON)
+and speaker diarization using voice embeddings.
+
+Output file (-o) is required unless --no-write is specified.
+
+Output Formats:
+  srt     SRT subtitle format (default)
+  text    Plain text with timestamps
+  json    JSON with full metadata
+
+Whisper Models (--model, -m):
+  tiny    Fastest, least accurate (default)
+  base    Fast, basic accuracy
+  small   Balanced speed/accuracy
+  medium  Good accuracy, slower
+  large   Best accuracy, slowest
+  turbo   Optimized for speed
+
+Examples:
+  # Basic transcription to SRT
+  transcribe audio.mp3 -o output.srt
+
+  # Use a larger model
+  transcribe audio.mp3 --model small -o output.srt
+
+  # Output as plain text
+  transcribe audio.mp3 --format text -o output.txt
+
+  # Enable speaker diarization
+  transcribe audio.mp3 --diarize -o output.srt
+
+  # Print to stdout instead of file
+  transcribe audio.mp3 --no-write
+
+  # Full example: diarization + specific model
+  transcribe audio.mp3 --model small --diarize -s 2 -o output.srt`,
+	Run: func(cmd *cobra.Command, args []string) {
+		if len(args) == 0 {
+			fmt.Println("Please provide audio files to transcribe")
+			_ = cmd.Help()
+			os.Exit(1)
+		}
+
+		// Require output file unless --no-write is set
+		if outputFile == "" && !noWrite {
+			fmt.Println("✗ Error: Output file required. Use -o <file> to specify output, or --no-write to print to stdout.")
+			os.Exit(1)
+		}
+
+		// Validate all provided files
+		for _, file := range args {
+			if _, err := os.Stat(file); os.IsNotExist(err) {
+				fmt.Printf("✗ Error: File '%s' does not exist\n", file)
+				os.Exit(1)
+			}
+
+			_, err := audio.NewAudioFile(file)
+			if err != nil {
+				fmt.Printf("✗ Error: File '%s' has unsupported format or error: %v\n", file, err)
+				os.Exit(1)
+			}
+		}
+
+		// Create whisper client and transcribe
+		whisperClient := whisper.NewClient(whisper.ModelSize(modelSize))
+		whisperOptions := whisper.DefaultTranscriptionOptions()
+
+		// Create diarization client if needed
+		var diarizationClient *diarization.Client
+		var diarizationOptions *diarization.DiarizationOptions
+		if diarize {
+			diarizationClient = diarization.NewClient()
+			diarizationOptions = &diarization.DiarizationOptions{
+				NumSpeakers: numSpeakers,
+			}
+		}
+
+		// Create output formatter
+		formatter := output.NewFormatter(output.FormatType(outputFormat))
+
+		for _, file := range args {
+			// Transcription with spinner
+			spinner := progress.NewSpinner(fmt.Sprintf("Transcribing %s (model: %s)...", file, modelSize))
+			spinner.Start()
+			result, err := whisperClient.Transcribe(file, whisperOptions)
+			if err != nil {
+				spinner.StopWithMessage(fmt.Sprintf("✗ Error transcribing %s: %v", file, err))
+				continue
+			}
+			spinner.StopWithMessage(fmt.Sprintf("✓ Transcribed %s (%.1fs audio)", file, result.Duration))
+
+			// Run diarization if enabled
+			if diarize {
+				spinner := progress.NewSpinner("Detecting speakers...")
+				spinner.Start()
+				diarizationResult, err := diarizationClient.Diarize(file, diarizationOptions)
+				if err != nil {
+					spinner.StopWithMessage(fmt.Sprintf("✗ Diarization failed: %v", err))
+				} else {
+					spinner.StopWithMessage(fmt.Sprintf("✓ Detected %d speaker(s)", diarizationResult.NumSpeakers))
+					diarization.AlignSpeakers(result, diarizationResult)
+				}
+			}
+
+			// Format output
+			formattedOutput, err := formatter.Format(result)
+			if err != nil {
+				fmt.Printf("Error formatting output: %v\n", err)
+				continue
+			}
+
+			// Write to file or stdout
+			if outputFile != "" {
+				err := os.WriteFile(outputFile, []byte(formattedOutput), 0644)
+				if err != nil {
+					fmt.Printf("✗ Error writing output file: %v\n", err)
+				} else {
+					fmt.Printf("✓ Saved to %s\n", outputFile)
+				}
+			} else {
+				fmt.Printf("\n%s\n", formattedOutput)
+			}
+		}
+	},
+}
+
+func init() {
+	rootCmd.Version = Version
+	rootCmd.PersistentFlags().StringVarP(&outputFile, "output", "o", "", "Output file path (required)")
+	rootCmd.PersistentFlags().StringVarP(&outputFormat, "format", "f", "srt", "Output format: text, srt, json")
+	rootCmd.PersistentFlags().BoolVar(&diarize, "diarize", false, "Enable speaker diarization")
+	rootCmd.PersistentFlags().IntVarP(&numSpeakers, "speakers", "s", 0, "Number of speakers (0 = auto-detect)")
+	rootCmd.PersistentFlags().StringVarP(&modelSize, "model", "m", "tiny", "Whisper model: tiny, base, small, medium, large, turbo")
+	rootCmd.PersistentFlags().BoolVar(&noWrite, "no-write", false, "Print output to stdout instead of file")
+}
+
+// Execute adds all child commands to the root command and sets flags appropriately.
+func Execute() {
+	if err := rootCmd.Execute(); err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+}