transcribe/cmd/root.go

package cmd

import (
	"fmt"
	"os"

	"transcribe/internal/diarization"
	"transcribe/internal/whisper"
	"transcribe/pkg/audio"
	"transcribe/pkg/output"
	"transcribe/pkg/progress"

	"github.com/spf13/cobra"
)

var Version = "dev"

var outputFile string
var outputFormat string
var diarize bool
var numSpeakers int
var modelSize string
var noWrite bool

// rootCmd represents the base command when called without any subcommands
var rootCmd = &cobra.Command{
	Use:   "transcribe",
	Short: "A CLI tool for transcribing audio files with speaker diarization",
	Long: `Transcribe is a command-line tool that uses OpenAI's Whisper model to
transcribe audio files. It supports multiple output formats (text, SRT, JSON)
and speaker diarization using voice embeddings.

Output file (-o) is required unless --no-write is specified.

Output Formats:
  srt     SRT subtitle format (default)
  text    Plain text with timestamps
  json    JSON with full metadata

Whisper Models (--model, -m):
  tiny    Fastest, least accurate (default)
  base    Fast, basic accuracy
  small   Balanced speed/accuracy
  medium  Good accuracy, slower
  large   Best accuracy, slowest
  turbo   Optimized for speed

Examples:
  # Basic transcription to SRT
  transcribe audio.mp3 -o output.srt

  # Use a larger model
  transcribe audio.mp3 --model small -o output.srt

  # Output as plain text
  transcribe audio.mp3 --format text -o output.txt

  # Enable speaker diarization
  transcribe audio.mp3 --diarize -o output.srt

  # Print to stdout instead of file
  transcribe audio.mp3 --no-write

  # Full example: diarization + specific model
  transcribe audio.mp3 --model small --diarize -s 2 -o output.srt`,
	Run: func(cmd *cobra.Command, args []string) {
		if len(args) == 0 {
			fmt.Println("Please provide audio files to transcribe")
			_ = cmd.Help()
			os.Exit(1)
		}

		// Require output file unless --no-write is set
		if outputFile == "" && !noWrite {
			fmt.Println("✗ Error: Output file required. Use -o <file> to specify output, or --no-write to print to stdout.")
			os.Exit(1)
		}

		// Validate all provided files
		for _, file := range args {
			if _, err := os.Stat(file); os.IsNotExist(err) {
				fmt.Printf("✗ Error: File '%s' does not exist\n", file)
				os.Exit(1)
			}

			_, err := audio.NewAudioFile(file)
			if err != nil {
				fmt.Printf("✗ Error: File '%s' has unsupported format or error: %v\n", file, err)
				os.Exit(1)
			}
		}

		// Create whisper client and transcribe
		whisperClient := whisper.NewClient(whisper.ModelSize(modelSize))
		whisperOptions := whisper.DefaultTranscriptionOptions()

		// Create diarization client if needed
		var diarizationClient *diarization.Client
		var diarizationOptions *diarization.DiarizationOptions
		if diarize {
			diarizationClient = diarization.NewClient()
			diarizationOptions = &diarization.DiarizationOptions{
				NumSpeakers: numSpeakers,
			}
		}

		// Create output formatter
		formatter := output.NewFormatter(output.FormatType(outputFormat))

		for _, file := range args {
			// Transcription with spinner
			spinner := progress.NewSpinner(fmt.Sprintf("Transcribing %s (model: %s)...", file, modelSize))
			spinner.Start()
			result, err := whisperClient.Transcribe(file, whisperOptions)
			if err != nil {
				spinner.StopWithMessage(fmt.Sprintf("✗ Error transcribing %s: %v", file, err))
				continue
			}
			spinner.StopWithMessage(fmt.Sprintf("✓ Transcribed %s (%.1fs audio)", file, result.Duration))

			// Run diarization if enabled
			if diarize {
				spinner := progress.NewSpinner("Detecting speakers...")
				spinner.Start()
				diarizationResult, err := diarizationClient.Diarize(file, diarizationOptions)
				if err != nil {
					spinner.StopWithMessage(fmt.Sprintf("✗ Diarization failed: %v", err))
				} else {
					spinner.StopWithMessage(fmt.Sprintf("✓ Detected %d speaker(s)", diarizationResult.NumSpeakers))
					diarization.AlignSpeakers(result, diarizationResult)
				}
			}

			// Format output
			formattedOutput, err := formatter.Format(result)
			if err != nil {
				fmt.Printf("Error formatting output: %v\n", err)
				continue
			}

			// Write to file or stdout
			if outputFile != "" {
				err := os.WriteFile(outputFile, []byte(formattedOutput), 0644)
				if err != nil {
					fmt.Printf("✗ Error writing output file: %v\n", err)
				} else {
					fmt.Printf("✓ Saved to %s\n", outputFile)
				}
			} else {
				fmt.Printf("\n%s\n", formattedOutput)
			}
		}
	},
}

func init() {
	rootCmd.Version = Version
	rootCmd.PersistentFlags().StringVarP(&outputFile, "output", "o", "", "Output file path (required)")
	rootCmd.PersistentFlags().StringVarP(&outputFormat, "format", "f", "srt", "Output format: text, srt, json")
	rootCmd.PersistentFlags().BoolVar(&diarize, "diarize", false, "Enable speaker diarization")
	rootCmd.PersistentFlags().IntVarP(&numSpeakers, "speakers", "s", 0, "Number of speakers (0 = auto-detect)")
	rootCmd.PersistentFlags().StringVarP(&modelSize, "model", "m", "tiny", "Whisper model: tiny, base, small, medium, large, turbo")
	rootCmd.PersistentFlags().BoolVar(&noWrite, "no-write", false, "Print output to stdout instead of file")
}

// Execute adds all child commands to the root command and sets flags appropriately.
func Execute() {
	if err := rootCmd.Execute(); err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
}