init commit

2026-01-25 17:13:15 -06:00
commit 1bbfc332d8
27 changed files with 2462 additions and 0 deletions
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -0,0 +1,374 @@
+package app
+
+import (
+	"fmt"
+	"os/exec"
+	"time"
+
+	"github.com/charmbracelet/bubbles/key"
+	tea "github.com/charmbracelet/bubbletea"
+	"github.com/charmbracelet/lipgloss"
+	"playback/internal/audio"
+	"playback/internal/config"
+	"playback/internal/srt"
+	"playback/internal/ui"
+	"playback/internal/ui/header"
+	"playback/internal/ui/transcript"
+	"playback/internal/ui/waveform"
+)
+
+// FocusedView represents which view has focus
+type FocusedView int
+
+const (
+	FocusWaveform FocusedView = iota
+	FocusTranscript
+)
+
+// Model is the main application model
+type Model struct {
+	// Configuration
+	config config.Config
+	keys   KeyMap
+
+	// Audio
+	player    *audio.Player
+	audioPath string
+
+	// Transcript
+	transcriptPath string
+
+	// UI Components
+	header     header.Model
+	waveform   waveform.Model
+	transcript transcript.Model
+
+	// State
+	focused    FocusedView
+	showHelp   bool
+	width      int
+	height     int
+	err        error
+	statusMsg  string
+	quitting   bool
+}
+
+// New creates a new application model
+func New(audioPath, transcriptPath string) Model {
+	cfg := config.Load()
+
+	m := Model{
+		config:         cfg,
+		keys:           DefaultKeyMap(),
+		player:         audio.NewPlayer(),
+		audioPath:      audioPath,
+		transcriptPath: transcriptPath,
+		header:         header.New(),
+		waveform:       waveform.New(),
+		transcript:     transcript.New(),
+		focused:        FocusWaveform,
+	}
+
+	return m
+}
+
+// Init initializes the application
+func (m Model) Init() tea.Cmd {
+	return tea.Batch(
+		m.loadAudio(),
+		m.loadTranscript(),
+		m.tickCmd(),
+	)
+}
+
+func (m Model) loadAudio() tea.Cmd {
+	return func() tea.Msg {
+		if err := m.player.Load(m.audioPath); err != nil {
+			return ErrorMsg{Err: fmt.Errorf("failed to load audio: %w", err)}
+		}
+
+		// Load waveform data
+		samples, err := m.player.GetSamples(200)
+		if err != nil {
+			return WaveformLoadedMsg{Err: err}
+		}
+		return WaveformLoadedMsg{Samples: samples}
+	}
+}
+
+func (m Model) loadTranscript() tea.Cmd {
+	return func() tea.Msg {
+		path := m.transcriptPath
+
+		// Try to find transcript if not specified
+		if path == "" {
+			path = srt.FindTranscript(m.audioPath)
+		}
+
+		// Create temp file if no transcript found
+		if path == "" {
+			var err error
+			path, err = srt.CreateTempTranscript(m.audioPath)
+			if err != nil {
+				return ErrorMsg{Err: fmt.Errorf("failed to create temp transcript: %w", err)}
+			}
+		}
+
+		t, err := srt.Load(path)
+		if err != nil {
+			return ErrorMsg{Err: fmt.Errorf("failed to load transcript: %w", err)}
+		}
+
+		return transcriptLoadedMsg{transcript: t}
+	}
+}
+
+type transcriptLoadedMsg struct {
+	transcript *srt.Transcript
+}
+
+func (m Model) tickCmd() tea.Cmd {
+	return tea.Tick(100*time.Millisecond, func(t time.Time) tea.Msg {
+		return TickMsg(t)
+	})
+}
+
+// Update handles messages
+func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
+	var cmds []tea.Cmd
+
+	switch msg := msg.(type) {
+	case tea.WindowSizeMsg:
+		m.width = msg.Width
+		m.height = msg.Height
+		m.updateLayout()
+
+	case tea.KeyMsg:
+		// Global keys
+		switch {
+		case key.Matches(msg, m.keys.Quit):
+			m.quitting = true
+			m.player.Close()
+			return m, tea.Quit
+
+		case key.Matches(msg, m.keys.Help):
+			m.showHelp = !m.showHelp
+
+		case key.Matches(msg, m.keys.PlayPause):
+			m.player.Toggle()
+
+		case key.Matches(msg, m.keys.FocusWaveform):
+			m.focused = FocusWaveform
+			m.waveform.SetFocused(true)
+			m.transcript.SetFocused(false)
+
+		case key.Matches(msg, m.keys.FocusTranscript):
+			m.focused = FocusTranscript
+			m.waveform.SetFocused(false)
+			m.transcript.SetFocused(true)
+
+		case key.Matches(msg, m.keys.EnterEdit):
+			if m.focused == FocusTranscript {
+				m.player.Pause()
+				return m, m.launchEditor()
+			}
+		}
+
+		// Context-specific keys
+		if m.focused == FocusWaveform {
+			switch {
+			case key.Matches(msg, m.keys.SeekForward):
+				m.player.SeekRelative(m.config.SeekStep)
+			case key.Matches(msg, m.keys.SeekBackward):
+				m.player.SeekRelative(-m.config.SeekStep)
+			case key.Matches(msg, m.keys.SeekForwardBig):
+				m.player.SeekRelative(m.config.BigSeekStep)
+			case key.Matches(msg, m.keys.SeekBackwardBig):
+				m.player.SeekRelative(-m.config.BigSeekStep)
+			}
+		}
+
+		if m.focused == FocusTranscript {
+			cmd := m.transcript.Update(msg)
+			cmds = append(cmds, cmd)
+		}
+
+	case TickMsg:
+		m.updatePosition()
+		cmds = append(cmds, m.tickCmd())
+
+	case WaveformLoadedMsg:
+		if msg.Err != nil {
+			m.statusMsg = fmt.Sprintf("Waveform: %v", msg.Err)
+		} else {
+			data := audio.NewWaveformData(msg.Samples)
+			m.waveform.SetSamples(data)
+			m.waveform.SetDuration(m.player.Duration())
+		}
+
+	case transcriptLoadedMsg:
+		m.transcript.SetTranscript(msg.transcript)
+		m.transcriptPath = msg.transcript.FilePath
+		m.header.SetPaths(m.audioPath, m.transcriptPath, msg.transcript.IsTemp)
+
+	case ErrorMsg:
+		m.err = msg.Err
+
+	case SavedMsg:
+		if msg.Err != nil {
+			m.statusMsg = fmt.Sprintf("Save failed: %v", msg.Err)
+		} else {
+			m.statusMsg = fmt.Sprintf("Saved to %s", msg.Path)
+		}
+
+	case VimExitedMsg:
+		if msg.Err != nil {
+			m.statusMsg = fmt.Sprintf("Editor error: %v", msg.Err)
+		} else {
+			return m, m.reloadTranscript(msg.Path)
+		}
+
+	case transcript.SeekToCueMsg:
+		m.player.Seek(msg.Position)
+	}
+
+	return m, tea.Batch(cmds...)
+}
+
+func (m *Model) updatePosition() {
+	pos := m.player.Position()
+	dur := m.player.Duration()
+
+	if dur > 0 {
+		m.waveform.SetPosition(float64(pos) / float64(dur))
+	}
+
+	m.transcript.SetPosition(pos)
+}
+
+func (m *Model) updateLayout() {
+	// Header: 2 lines
+	headerHeight := 2
+
+	// Waveform: 5 lines
+	waveformHeight := 5
+
+	// Status bar: 1 line
+	statusHeight := 1
+
+	// Transcript: remaining space
+	transcriptHeight := m.height - headerHeight - waveformHeight - statusHeight - 2
+
+	m.header.SetWidth(m.width)
+	m.waveform.SetSize(m.width, waveformHeight)
+	m.transcript.SetSize(m.width, transcriptHeight)
+
+	m.waveform.SetFocused(m.focused == FocusWaveform)
+	m.transcript.SetFocused(m.focused == FocusTranscript)
+}
+
+func (m Model) launchEditor() tea.Cmd {
+	t := m.transcript.Transcript()
+	if t == nil {
+		return nil
+	}
+	lineNum := m.transcript.SelectedCueLineNumber()
+	c := exec.Command(m.config.Editor, fmt.Sprintf("+%d", lineNum), t.FilePath)
+	return tea.ExecProcess(c, func(err error) tea.Msg {
+		return VimExitedMsg{Path: t.FilePath, Err: err}
+	})
+}
+
+func (m Model) reloadTranscript(path string) tea.Cmd {
+	return func() tea.Msg {
+		t, err := srt.Load(path)
+		if err != nil {
+			return ErrorMsg{Err: err}
+		}
+		return transcriptLoadedMsg{transcript: t}
+	}
+}
+
+// View renders the application
+func (m Model) View() string {
+	if m.quitting {
+		return ""
+	}
+
+	if m.err != nil {
+		return ui.ErrorStyle.Render(fmt.Sprintf("Error: %v\n\nPress 'q' to quit.", m.err))
+	}
+
+	if m.showHelp {
+		return m.renderHelp()
+	}
+
+	// Header
+	headerView := m.header.View(m.player.Position(), m.player.Duration(), m.player.IsPlaying())
+
+	// Waveform
+	waveformView := m.waveform.View()
+
+	// Transcript
+	transcriptView := m.transcript.View()
+
+	// Status bar
+	statusView := m.renderStatus()
+
+	return lipgloss.JoinVertical(
+		lipgloss.Left,
+		headerView,
+		"",
+		waveformView,
+		transcriptView,
+		statusView,
+	)
+}
+
+func (m Model) renderStatus() string {
+	// Mode indicator
+	modeStyle := ui.ModeStyle
+	mode := modeStyle.Render(m.transcript.ModeString())
+
+	// Focus indicator
+	focusStr := "Waveform"
+	if m.focused == FocusTranscript {
+		focusStr = "Transcript"
+	}
+	focus := ui.BaseStyle.Render(fmt.Sprintf("[%s]", focusStr))
+
+	// Status message
+	statusMsg := ui.StatusBarStyle.Render(m.statusMsg)
+
+	// Help hint
+	helpHint := ui.HelpDescStyle.Render("Press ? for help")
+
+	return lipgloss.JoinHorizontal(
+		lipgloss.Center,
+		mode,
+		"  ",
+		focus,
+		"  ",
+		statusMsg,
+		lipgloss.NewStyle().Width(m.width-lipgloss.Width(mode)-lipgloss.Width(focus)-lipgloss.Width(statusMsg)-lipgloss.Width(helpHint)-8).Render(""),
+		helpHint,
+	)
+}
+
+func (m Model) renderHelp() string {
+	help := m.keys.HelpView()
+
+	helpStyle := lipgloss.NewStyle().
+		Border(lipgloss.RoundedBorder()).
+		BorderForeground(ui.ColorPrimary).
+		Padding(1, 2).
+		Width(60)
+
+	return lipgloss.Place(
+		m.width,
+		m.height,
+		lipgloss.Center,
+		lipgloss.Center,
+		helpStyle.Render(help),
+	)
+}
--- a/internal/app/keys.go
+++ b/internal/app/keys.go
@@ -0,0 +1,130 @@
+package app
+
+import "github.com/charmbracelet/bubbles/key"
+
+// KeyMap defines all keybindings
+type KeyMap struct {
+	// Global
+	Quit       key.Binding
+	Help       key.Binding
+	PlayPause  key.Binding
+
+	// Focus
+	FocusWaveform   key.Binding
+	FocusTranscript key.Binding
+
+	// Waveform navigation
+	SeekForward    key.Binding
+	SeekBackward   key.Binding
+	SeekForwardBig key.Binding
+	SeekBackwardBig key.Binding
+
+	// Transcript navigation
+	ScrollUp   key.Binding
+	ScrollDown key.Binding
+	PageUp     key.Binding
+	PageDown   key.Binding
+	GoTop      key.Binding
+	GoBottom   key.Binding
+
+	// Editing
+	EnterEdit key.Binding
+}
+
+// DefaultKeyMap returns the default keybindings
+func DefaultKeyMap() KeyMap {
+	return KeyMap{
+		Quit: key.NewBinding(
+			key.WithKeys("q", "ctrl+c"),
+			key.WithHelp("q", "quit"),
+		),
+		Help: key.NewBinding(
+			key.WithKeys("?"),
+			key.WithHelp("?", "help"),
+		),
+		PlayPause: key.NewBinding(
+			key.WithKeys(" "),
+			key.WithHelp("space", "play/pause"),
+		),
+		FocusWaveform: key.NewBinding(
+			key.WithKeys("ctrl+k"),
+			key.WithHelp("ctrl+k", "focus waveform"),
+		),
+		FocusTranscript: key.NewBinding(
+			key.WithKeys("ctrl+j"),
+			key.WithHelp("ctrl+j", "focus transcript"),
+		),
+		SeekForward: key.NewBinding(
+			key.WithKeys("l", "right"),
+			key.WithHelp("l/→", "seek forward"),
+		),
+		SeekBackward: key.NewBinding(
+			key.WithKeys("h", "left"),
+			key.WithHelp("h/←", "seek backward"),
+		),
+		SeekForwardBig: key.NewBinding(
+			key.WithKeys("L", "shift+right"),
+			key.WithHelp("L", "seek forward (big)"),
+		),
+		SeekBackwardBig: key.NewBinding(
+			key.WithKeys("H", "shift+left"),
+			key.WithHelp("H", "seek backward (big)"),
+		),
+		ScrollUp: key.NewBinding(
+			key.WithKeys("k", "up"),
+			key.WithHelp("k/↑", "scroll up"),
+		),
+		ScrollDown: key.NewBinding(
+			key.WithKeys("j", "down"),
+			key.WithHelp("j/↓", "scroll down"),
+		),
+		PageUp: key.NewBinding(
+			key.WithKeys("ctrl+u"),
+			key.WithHelp("ctrl+u", "page up"),
+		),
+		PageDown: key.NewBinding(
+			key.WithKeys("ctrl+d"),
+			key.WithHelp("ctrl+d", "page down"),
+		),
+		GoTop: key.NewBinding(
+			key.WithKeys("g"),
+			key.WithHelp("gg", "go to top"),
+		),
+		GoBottom: key.NewBinding(
+			key.WithKeys("G"),
+			key.WithHelp("G", "go to bottom"),
+		),
+		EnterEdit: key.NewBinding(
+			key.WithKeys("i"),
+			key.WithHelp("i", "edit transcript"),
+		),
+	}
+}
+
+// HelpView returns a formatted help string
+func (k KeyMap) HelpView() string {
+	return `Keybindings:
+
+Global:
+  space       Play/Pause
+  ctrl+j      Focus transcript
+  ctrl+k      Focus waveform
+  q           Quit
+  ?           Toggle help
+
+Waveform (when focused):
+  h / ←       Seek backward (5s)
+  l / →       Seek forward (5s)
+  H           Seek backward (30s)
+  L           Seek forward (30s)
+
+Transcript (when focused):
+  j / ↓       Next cue
+  k / ↑       Previous cue
+  ctrl+d      Jump 5 cues down
+  ctrl+u      Jump 5 cues up
+  g           Go to first cue
+  G           Go to last cue
+  enter       Seek audio to cue
+  i           Edit in $EDITOR at cue`
+}
--- a/internal/app/messages.go
+++ b/internal/app/messages.go
@@ -0,0 +1,29 @@
+package app
+
+import "time"
+
+// TickMsg is sent periodically to update playback position
+type TickMsg time.Time
+
+// WaveformLoadedMsg is sent when waveform data is ready
+type WaveformLoadedMsg struct {
+	Samples []float64
+	Err     error
+}
+
+// ErrorMsg represents an error
+type ErrorMsg struct {
+	Err error
+}
+
+// SavedMsg is sent when transcript is saved
+type SavedMsg struct {
+	Path string
+	Err  error
+}
+
+// VimExitedMsg is sent when vim finishes editing
+type VimExitedMsg struct {
+	Path string
+	Err  error
+}
--- a/internal/audio/formats.go
+++ b/internal/audio/formats.go
@@ -0,0 +1,60 @@
+package audio
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// AudioFormat represents a supported audio format
+type AudioFormat int
+
+const (
+	FormatUnknown AudioFormat = iota
+	FormatMP3
+	FormatWAV
+	FormatFLAC
+	FormatOGG
+)
+
+// DetectFormat returns the audio format based on file extension
+func DetectFormat(path string) AudioFormat {
+	ext := strings.ToLower(filepath.Ext(path))
+	switch ext {
+	case ".mp3":
+		return FormatMP3
+	case ".wav":
+		return FormatWAV
+	case ".flac":
+		return FormatFLAC
+	case ".ogg":
+		return FormatOGG
+	default:
+		return FormatUnknown
+	}
+}
+
+// String returns the format name
+func (f AudioFormat) String() string {
+	switch f {
+	case FormatMP3:
+		return "MP3"
+	case FormatWAV:
+		return "WAV"
+	case FormatFLAC:
+		return "FLAC"
+	case FormatOGG:
+		return "OGG"
+	default:
+		return "Unknown"
+	}
+}
+
+// ValidateFormat checks if the file format is supported
+func ValidateFormat(path string) error {
+	format := DetectFormat(path)
+	if format == FormatUnknown {
+		return fmt.Errorf("unsupported audio format: %s", filepath.Ext(path))
+	}
+	return nil
+}
--- a/internal/audio/player.go
+++ b/internal/audio/player.go
@@ -0,0 +1,313 @@
+package audio
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/gopxl/beep/v2"
+	"github.com/gopxl/beep/v2/flac"
+	"github.com/gopxl/beep/v2/mp3"
+	"github.com/gopxl/beep/v2/speaker"
+	"github.com/gopxl/beep/v2/vorbis"
+	"github.com/gopxl/beep/v2/wav"
+)
+
+// Player handles audio playback
+type Player struct {
+	mu sync.Mutex
+
+	filePath   string
+	file       *os.File
+	streamer   beep.StreamSeekCloser
+	format     beep.Format
+	ctrl       *beep.Ctrl
+	resampler  *beep.Resampler
+
+	playing    bool
+	initialized bool
+}
+
+// NewPlayer creates a new audio player
+func NewPlayer() *Player {
+	return &Player{}
+}
+
+// Load opens an audio file for playback
+func (p *Player) Load(path string) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	// Close any existing file
+	if p.file != nil {
+		p.streamer.Close()
+		p.file.Close()
+	}
+
+	format := DetectFormat(path)
+	if format == FormatUnknown {
+		return fmt.Errorf("unsupported audio format")
+	}
+
+	file, err := os.Open(path)
+	if err != nil {
+		return fmt.Errorf("failed to open audio file: %w", err)
+	}
+
+	var streamer beep.StreamSeekCloser
+	var audioFormat beep.Format
+
+	switch format {
+	case FormatMP3:
+		streamer, audioFormat, err = mp3.Decode(file)
+	case FormatWAV:
+		streamer, audioFormat, err = wav.Decode(file)
+	case FormatFLAC:
+		streamer, audioFormat, err = flac.Decode(file)
+	case FormatOGG:
+		streamer, audioFormat, err = vorbis.Decode(file)
+	}
+
+	if err != nil {
+		file.Close()
+		return fmt.Errorf("failed to decode audio: %w", err)
+	}
+
+	p.filePath = path
+	p.file = file
+	p.streamer = streamer
+	p.format = audioFormat
+	p.ctrl = &beep.Ctrl{Streamer: streamer, Paused: true}
+
+	// Initialize speaker if not already done
+	if !p.initialized {
+		sampleRate := beep.SampleRate(44100)
+		if err := speaker.Init(sampleRate, sampleRate.N(time.Second/10)); err != nil {
+			return fmt.Errorf("failed to initialize speaker: %w", err)
+		}
+		p.initialized = true
+	}
+
+	// Resample if needed
+	targetRate := beep.SampleRate(44100)
+	if audioFormat.SampleRate != targetRate {
+		p.resampler = beep.Resample(4, audioFormat.SampleRate, targetRate, p.ctrl)
+		speaker.Play(p.resampler)
+	} else {
+		p.resampler = nil
+		speaker.Play(p.ctrl)
+	}
+
+	return nil
+}
+
+// Play starts or resumes playback
+func (p *Player) Play() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.ctrl == nil {
+		return
+	}
+
+	speaker.Lock()
+	p.ctrl.Paused = false
+	speaker.Unlock()
+	p.playing = true
+}
+
+// Pause pauses playback
+func (p *Player) Pause() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.ctrl == nil {
+		return
+	}
+
+	speaker.Lock()
+	p.ctrl.Paused = true
+	speaker.Unlock()
+	p.playing = false
+}
+
+// Toggle toggles between play and pause
+func (p *Player) Toggle() {
+	p.mu.Lock()
+	if p.ctrl == nil {
+		p.mu.Unlock()
+		return
+	}
+	playing := p.playing
+	p.mu.Unlock()
+
+	if playing {
+		p.Pause()
+	} else {
+		p.Play()
+	}
+}
+
+// IsPlaying returns true if audio is currently playing
+func (p *Player) IsPlaying() bool {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return p.playing
+}
+
+// Position returns the current playback position
+func (p *Player) Position() time.Duration {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.streamer == nil {
+		return 0
+	}
+
+	speaker.Lock()
+	pos := p.format.SampleRate.D(p.streamer.Position())
+	speaker.Unlock()
+
+	return pos
+}
+
+// Duration returns the total duration
+func (p *Player) Duration() time.Duration {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.streamer == nil {
+		return 0
+	}
+
+	return p.format.SampleRate.D(p.streamer.Len())
+}
+
+// Seek moves to the specified position
+func (p *Player) Seek(pos time.Duration) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.streamer == nil {
+		return nil
+	}
+
+	sample := p.format.SampleRate.N(pos)
+	if sample < 0 {
+		sample = 0
+	}
+	if sample > p.streamer.Len() {
+		sample = p.streamer.Len()
+	}
+
+	speaker.Lock()
+	err := p.streamer.Seek(sample)
+	speaker.Unlock()
+
+	return err
+}
+
+// SeekRelative seeks relative to current position
+func (p *Player) SeekRelative(delta time.Duration) error {
+	pos := p.Position()
+	return p.Seek(pos + delta)
+}
+
+// Close releases resources
+func (p *Player) Close() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.streamer != nil {
+		p.streamer.Close()
+	}
+	if p.file != nil {
+		p.file.Close()
+	}
+}
+
+// Format returns the audio format info
+func (p *Player) Format() beep.Format {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return p.format
+}
+
+// Streamer returns the underlying streamer (for waveform extraction)
+func (p *Player) Streamer() beep.StreamSeekCloser {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	return p.streamer
+}
+
+// GetSamples extracts sample data for waveform visualization
+func (p *Player) GetSamples(numSamples int) ([]float64, error) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.streamer == nil {
+		return nil, fmt.Errorf("no audio loaded")
+	}
+
+	totalSamples := p.streamer.Len()
+	if totalSamples == 0 {
+		return nil, fmt.Errorf("empty audio")
+	}
+
+	samples := make([]float64, numSamples)
+	samplesPerBucket := totalSamples / numSamples
+	if samplesPerBucket < 1 {
+		samplesPerBucket = 1
+	}
+
+	// Save current position
+	speaker.Lock()
+	currentPos := p.streamer.Position()
+	speaker.Unlock()
+
+	buf := make([][2]float64, samplesPerBucket)
+
+	for i := 0; i < numSamples; i++ {
+		targetPos := i * samplesPerBucket
+		if targetPos >= totalSamples {
+			break
+		}
+
+		speaker.Lock()
+		p.streamer.Seek(targetPos)
+		speaker.Unlock()
+
+		speaker.Lock()
+		n, ok := p.streamer.Stream(buf)
+		speaker.Unlock()
+
+		if !ok || n == 0 {
+			if err, isErr := p.streamer.(interface{ Err() error }); isErr && err.Err() != nil {
+				if err.Err() != io.EOF {
+					continue
+				}
+			}
+			continue
+		}
+
+		// Calculate average absolute amplitude
+		var sum float64
+		for j := 0; j < n; j++ {
+			val := (buf[j][0] + buf[j][1]) / 2
+			if val < 0 {
+				val = -val
+			}
+			sum += val
+		}
+		samples[i] = sum / float64(n)
+	}
+
+	// Restore position
+	speaker.Lock()
+	p.streamer.Seek(currentPos)
+	speaker.Unlock()
+
+	return samples, nil
+}
--- a/internal/audio/waveform.go
+++ b/internal/audio/waveform.go
@@ -0,0 +1,36 @@
+package audio
+
+// WaveformData holds pre-computed waveform samples
+type WaveformData struct {
+	Samples   []float64
+	MaxValue  float64
+}
+
+// NewWaveformData creates waveform data from raw samples
+func NewWaveformData(samples []float64) *WaveformData {
+	wd := &WaveformData{
+		Samples: samples,
+	}
+
+	// Find max value for normalization
+	for _, s := range samples {
+		if s > wd.MaxValue {
+			wd.MaxValue = s
+		}
+	}
+
+	if wd.MaxValue == 0 {
+		wd.MaxValue = 1 // Avoid division by zero
+	}
+
+	return wd
+}
+
+// Normalized returns samples normalized to 0-1 range
+func (w *WaveformData) Normalized() []float64 {
+	normalized := make([]float64, len(w.Samples))
+	for i, s := range w.Samples {
+		normalized[i] = s / w.MaxValue
+	}
+	return normalized
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -0,0 +1,92 @@
+package config
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// Config holds application configuration
+type Config struct {
+	SeekStep    time.Duration `json:"seek_step"`
+	BigSeekStep time.Duration `json:"big_seek_step"`
+	Volume      float64       `json:"volume"`
+	Theme       string        `json:"theme"`
+	Editor      string        `json:"editor"`
+}
+
+// configFile is the JSON serialization format
+type configFile struct {
+	SeekStepMs    int64   `json:"seek_step_ms"`
+	BigSeekStepMs int64   `json:"big_seek_step_ms"`
+	Volume        float64 `json:"volume"`
+	Theme         string  `json:"theme"`
+	Editor        string  `json:"editor"`
+}
+
+// configPath returns the path to the config file
+func configPath() string {
+	configDir, err := os.UserConfigDir()
+	if err != nil {
+		configDir = os.Getenv("HOME")
+	}
+	return filepath.Join(configDir, "playback", "config.json")
+}
+
+// Load loads configuration from disk, returning defaults if not found
+func Load() Config {
+	cfg := DefaultConfig()
+
+	data, err := os.ReadFile(configPath())
+	if err != nil {
+		return cfg
+	}
+
+	var cf configFile
+	if err := json.Unmarshal(data, &cf); err != nil {
+		return cfg
+	}
+
+	if cf.SeekStepMs > 0 {
+		cfg.SeekStep = time.Duration(cf.SeekStepMs) * time.Millisecond
+	}
+	if cf.BigSeekStepMs > 0 {
+		cfg.BigSeekStep = time.Duration(cf.BigSeekStepMs) * time.Millisecond
+	}
+	if cf.Volume > 0 {
+		cfg.Volume = cf.Volume
+	}
+	if cf.Theme != "" {
+		cfg.Theme = cf.Theme
+	}
+	if cf.Editor != "" {
+		cfg.Editor = cf.Editor
+	}
+
+	return cfg
+}
+
+// Save writes configuration to disk
+func (c Config) Save() error {
+	cf := configFile{
+		SeekStepMs:    c.SeekStep.Milliseconds(),
+		BigSeekStepMs: c.BigSeekStep.Milliseconds(),
+		Volume:        c.Volume,
+		Theme:         c.Theme,
+		Editor:        c.Editor,
+	}
+
+	data, err := json.MarshalIndent(cf, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	// Ensure directory exists
+	dir := filepath.Dir(configPath())
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return err
+	}
+
+	return os.WriteFile(configPath(), data, 0644)
+}
--- a/internal/config/defaults.go
+++ b/internal/config/defaults.go
@@ -0,0 +1,22 @@
+package config
+
+import "time"
+
+// Default configuration values
+const (
+	DefaultSeekStep    = 5 * time.Second
+	DefaultBigSeekStep = 30 * time.Second
+	DefaultVolume      = 1.0
+	DefaultEditor      = "vim"
+)
+
+// DefaultConfig returns the default configuration
+func DefaultConfig() Config {
+	return Config{
+		SeekStep:    DefaultSeekStep,
+		BigSeekStep: DefaultBigSeekStep,
+		Volume:      DefaultVolume,
+		Theme:       "default",
+		Editor:      DefaultEditor,
+	}
+}
--- a/internal/srt/parser.go
+++ b/internal/srt/parser.go
@@ -0,0 +1,112 @@
+package srt
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	astisub "github.com/asticode/go-astisub"
+)
+
+// Load loads an SRT file from the given path
+func Load(path string) (*Transcript, error) {
+	subs, err := astisub.OpenFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse SRT file: %w", err)
+	}
+
+	transcript := &Transcript{
+		FilePath: path,
+		IsTemp:   strings.HasSuffix(path, ".tmp"),
+		Cues:     make([]Cue, len(subs.Items)),
+	}
+
+	lineNum := 1 // SRT files are 1-indexed
+	for i, item := range subs.Items {
+		var textParts []string
+		for _, line := range item.Lines {
+			var lineParts []string
+			for _, lineItem := range line.Items {
+				lineParts = append(lineParts, lineItem.Text)
+			}
+			textParts = append(textParts, strings.Join(lineParts, ""))
+		}
+
+		text := strings.Join(textParts, "\n")
+		transcript.Cues[i] = Cue{
+			Index:      i + 1,
+			Start:      item.StartAt,
+			End:        item.EndAt,
+			Text:       text,
+			LineNumber: lineNum,
+		}
+
+		// Calculate lines used by this cue:
+		// 1 (index) + 1 (timestamp) + text lines + 1 (blank line)
+		textLines := 1
+		if text != "" {
+			textLines = strings.Count(text, "\n") + 1
+		}
+		lineNum += 2 + textLines + 1 // index + timestamp + text + blank
+	}
+
+	return transcript, nil
+}
+
+// FindTranscript looks for an SRT file next to the audio file
+func FindTranscript(audioPath string) string {
+	ext := filepath.Ext(audioPath)
+	basePath := strings.TrimSuffix(audioPath, ext)
+
+	// Try common SRT naming patterns
+	patterns := []string{
+		basePath + ".srt",
+		basePath + ".en.srt",
+		audioPath + ".srt",
+	}
+
+	for _, pattern := range patterns {
+		if _, err := os.Stat(pattern); err == nil {
+			return pattern
+		}
+	}
+
+	return ""
+}
+
+// CreateTempTranscript creates a temporary SRT file with placeholder content
+func CreateTempTranscript(audioPath string) (string, error) {
+	basename := filepath.Base(audioPath)
+	ext := filepath.Ext(basename)
+	nameOnly := strings.TrimSuffix(basename, ext)
+
+	tempPath := filepath.Join(os.TempDir(), nameOnly+".srt.tmp")
+
+	content := fmt.Sprintf(`1
+00:00:00,000 --> 00:00:05,000
+[No transcript found for: %s]
+
+2
+00:00:05,000 --> 00:00:15,000
+This is a temporary transcript file.
+You can edit it using vim-style commands.
+Press 'i' to enter edit mode, 'esc' to exit.
+
+3
+00:00:15,000 --> 00:00:25,000
+To generate a transcript automatically, try:
+https://git.beitzah.net/ysandler/transcribe
+
+4
+00:00:25,000 --> 00:00:35,000
+Or launch with an existing transcript:
+playback %s -t /path/to/transcript.srt
+`, basename, basename)
+
+	if err := os.WriteFile(tempPath, []byte(content), 0644); err != nil {
+		return "", fmt.Errorf("failed to create temp transcript: %w", err)
+	}
+
+	return tempPath, nil
+}
--- a/internal/srt/types.go
+++ b/internal/srt/types.go
@@ -0,0 +1,39 @@
+package srt
+
+import "time"
+
+// Cue represents a single subtitle entry
+type Cue struct {
+	Index      int
+	Start      time.Duration
+	End        time.Duration
+	Text       string
+	LineNumber int // Line number in the SRT file (1-indexed)
+}
+
+// Transcript represents a complete subtitle file
+type Transcript struct {
+	Cues     []Cue
+	FilePath string
+	IsTemp   bool
+}
+
+// CueAt returns the cue that contains the given time position
+func (t *Transcript) CueAt(pos time.Duration) *Cue {
+	for i := range t.Cues {
+		if pos >= t.Cues[i].Start && pos < t.Cues[i].End {
+			return &t.Cues[i]
+		}
+	}
+	return nil
+}
+
+// CueIndexAt returns the index of the cue at the given position, or -1
+func (t *Transcript) CueIndexAt(pos time.Duration) int {
+	for i := range t.Cues {
+		if pos >= t.Cues[i].Start && pos < t.Cues[i].End {
+			return i
+		}
+	}
+	return -1
+}
--- a/internal/srt/writer.go
+++ b/internal/srt/writer.go
@@ -0,0 +1,79 @@
+package srt
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// formatDuration formats a duration as SRT timestamp (HH:MM:SS,mmm)
+func formatDuration(d int64) string {
+	ms := d % 1000
+	d /= 1000
+	s := d % 60
+	d /= 60
+	m := d % 60
+	h := d / 60
+
+	return fmt.Sprintf("%02d:%02d:%02d,%03d", h, m, s, ms)
+}
+
+// Save writes the transcript to an SRT file
+func (t *Transcript) Save() error {
+	return t.SaveTo(t.FilePath)
+}
+
+// SaveTo writes the transcript to the specified path
+func (t *Transcript) SaveTo(path string) error {
+	var sb strings.Builder
+
+	for i, cue := range t.Cues {
+		if i > 0 {
+			sb.WriteString("\n")
+		}
+		sb.WriteString(fmt.Sprintf("%d\n", cue.Index))
+		sb.WriteString(fmt.Sprintf("%s --> %s\n",
+			formatDuration(cue.Start.Milliseconds()),
+			formatDuration(cue.End.Milliseconds())))
+		sb.WriteString(cue.Text)
+		sb.WriteString("\n")
+	}
+
+	// Ensure directory exists
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return fmt.Errorf("failed to create directory: %w", err)
+	}
+
+	if err := os.WriteFile(path, []byte(sb.String()), 0644); err != nil {
+		return fmt.Errorf("failed to write file: %w", err)
+	}
+
+	return nil
+}
+
+// PromoteTempFile saves the transcript to a permanent location
+func (t *Transcript) PromoteTempFile(audioPath string) (string, error) {
+	if !t.IsTemp {
+		return t.FilePath, nil
+	}
+
+	// Create permanent path next to audio file
+	ext := filepath.Ext(audioPath)
+	permanentPath := strings.TrimSuffix(audioPath, ext) + ".srt"
+
+	if err := t.SaveTo(permanentPath); err != nil {
+		return "", err
+	}
+
+	// Update transcript state
+	t.FilePath = permanentPath
+	t.IsTemp = false
+
+	// Remove temp file
+	tempPath := filepath.Join(os.TempDir(), filepath.Base(audioPath))
+	os.Remove(strings.TrimSuffix(tempPath, filepath.Ext(tempPath)) + ".srt.tmp")
+
+	return permanentPath, nil
+}
--- a/internal/ui/header/header.go
+++ b/internal/ui/header/header.go
@@ -0,0 +1,97 @@
+package header
+
+import (
+	"fmt"
+	"path/filepath"
+	"time"
+
+	"github.com/charmbracelet/lipgloss"
+	"playback/internal/ui"
+)
+
+// Model represents the header component
+type Model struct {
+	AudioPath      string
+	TranscriptPath string
+	IsTemp         bool
+	Width          int
+}
+
+// New creates a new header model
+func New() Model {
+	return Model{}
+}
+
+// SetPaths sets the file paths
+func (m *Model) SetPaths(audioPath, transcriptPath string, isTemp bool) {
+	m.AudioPath = audioPath
+	m.TranscriptPath = transcriptPath
+	m.IsTemp = isTemp
+}
+
+// SetWidth sets the header width
+func (m *Model) SetWidth(width int) {
+	m.Width = width
+}
+
+// formatDuration formats a duration as MM:SS
+func formatDuration(d time.Duration) string {
+	d = d.Round(time.Second)
+	m := d / time.Minute
+	s := (d % time.Minute) / time.Second
+	return fmt.Sprintf("%02d:%02d", m, s)
+}
+
+// View renders the header
+func (m Model) View(position, duration time.Duration, playing bool) string {
+	// Title
+	title := ui.HeaderStyle.Render("♪ Playback")
+
+	// File info
+	audioName := filepath.Base(m.AudioPath)
+	transcriptName := filepath.Base(m.TranscriptPath)
+	if m.IsTemp {
+		transcriptName += " (temp)"
+	}
+
+	fileInfo := ui.FilePathStyle.Render(
+		fmt.Sprintf("Audio: %s | Transcript: %s", audioName, transcriptName),
+	)
+
+	// Playback status
+	status := "⏸ Paused"
+	if playing {
+		status = "▶ Playing"
+	}
+
+	timeInfo := fmt.Sprintf("%s / %s", formatDuration(position), formatDuration(duration))
+
+	statusStyle := lipgloss.NewStyle().Foreground(ui.ColorSecondary)
+	if !playing {
+		statusStyle = lipgloss.NewStyle().Foreground(ui.ColorMuted)
+	}
+
+	rightSide := lipgloss.JoinHorizontal(
+		lipgloss.Center,
+		statusStyle.Render(status),
+		"  ",
+		ui.BaseStyle.Render(timeInfo),
+	)
+
+	// Layout
+	leftWidth := lipgloss.Width(title) + lipgloss.Width(fileInfo) + 2
+	rightWidth := lipgloss.Width(rightSide)
+	spacerWidth := m.Width - leftWidth - rightWidth - 4
+	if spacerWidth < 1 {
+		spacerWidth = 1
+	}
+
+	return lipgloss.JoinHorizontal(
+		lipgloss.Center,
+		title,
+		"  ",
+		fileInfo,
+		lipgloss.NewStyle().Width(spacerWidth).Render(""),
+		rightSide,
+	)
+}
--- a/internal/ui/styles.go
+++ b/internal/ui/styles.go
@@ -0,0 +1,106 @@
+package ui
+
+import "github.com/charmbracelet/lipgloss"
+
+// Colors
+var (
+	ColorPrimary    = lipgloss.Color("#7C3AED") // Purple
+	ColorSecondary  = lipgloss.Color("#10B981") // Green
+	ColorAccent     = lipgloss.Color("#F59E0B") // Amber
+	ColorMuted      = lipgloss.Color("#6B7280") // Gray
+	ColorBackground = lipgloss.Color("#1F2937") // Dark gray
+	ColorForeground = lipgloss.Color("#F9FAFB") // Light gray
+	ColorHighlight  = lipgloss.Color("#374151") // Medium gray
+	ColorError      = lipgloss.Color("#EF4444") // Red
+)
+
+// Styles
+var (
+	// Base styles
+	BaseStyle = lipgloss.NewStyle().
+			Foreground(ColorForeground)
+
+	// Header styles
+	HeaderStyle = lipgloss.NewStyle().
+			Bold(true).
+			Foreground(ColorPrimary).
+			Padding(0, 1)
+
+	FilePathStyle = lipgloss.NewStyle().
+			Foreground(ColorMuted).
+			Italic(true)
+
+	// Waveform styles
+	WaveformStyle = lipgloss.NewStyle().
+			Border(lipgloss.RoundedBorder()).
+			BorderForeground(ColorMuted).
+			Padding(0, 1)
+
+	WaveformFocusedStyle = lipgloss.NewStyle().
+				Border(lipgloss.RoundedBorder()).
+				BorderForeground(ColorPrimary).
+				Padding(0, 1)
+
+	NeedleStyle = lipgloss.NewStyle().
+			Foreground(ColorAccent).
+			Bold(true)
+
+	// Transcript styles
+	TranscriptStyle = lipgloss.NewStyle().
+			Border(lipgloss.RoundedBorder()).
+			BorderForeground(ColorMuted).
+			Padding(0, 1)
+
+	TranscriptFocusedStyle = lipgloss.NewStyle().
+				Border(lipgloss.RoundedBorder()).
+				BorderForeground(ColorPrimary).
+				Padding(0, 1)
+
+	CurrentCueStyle = lipgloss.NewStyle().
+			Background(ColorHighlight).
+			Foreground(ColorSecondary).
+			Bold(true)
+
+	SelectedCueStyle = lipgloss.NewStyle().
+				Foreground(ColorAccent).
+				Bold(true)
+
+	TimestampStyle = lipgloss.NewStyle().
+			Foreground(ColorMuted)
+
+	SelectedTimestampStyle = lipgloss.NewStyle().
+				Foreground(ColorAccent)
+
+	// Status bar styles
+	StatusBarStyle = lipgloss.NewStyle().
+			Foreground(ColorMuted).
+			Padding(0, 1)
+
+	ModeStyle = lipgloss.NewStyle().
+			Background(ColorPrimary).
+			Foreground(ColorForeground).
+			Padding(0, 1).
+			Bold(true)
+
+	InsertModeStyle = lipgloss.NewStyle().
+			Background(ColorSecondary).
+			Foreground(ColorForeground).
+			Padding(0, 1).
+			Bold(true)
+
+	CommandStyle = lipgloss.NewStyle().
+			Foreground(ColorAccent)
+
+	// Help styles
+	HelpKeyStyle = lipgloss.NewStyle().
+			Foreground(ColorSecondary).
+			Bold(true)
+
+	HelpDescStyle = lipgloss.NewStyle().
+			Foreground(ColorMuted)
+
+	// Error styles
+	ErrorStyle = lipgloss.NewStyle().
+			Foreground(ColorError).
+			Bold(true)
+)
--- a/internal/ui/transcript/highlight.go
+++ b/internal/ui/transcript/highlight.go
@@ -0,0 +1,63 @@
+package transcript
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/charmbracelet/lipgloss"
+	"playback/internal/srt"
+	"playback/internal/ui"
+)
+
+// RenderCue renders a single cue with optional highlighting
+func RenderCue(cue *srt.Cue, isCurrent, isSelected bool, width int) string {
+	// Format timestamp
+	timestamp := formatTimestamp(cue.Start, cue.End)
+
+	// Apply styles based on state
+	var textStyle, timestampStyle lipgloss.Style
+	if isSelected {
+		// Selected cue (navigation cursor) - use accent color
+		textStyle = ui.SelectedCueStyle
+		timestampStyle = ui.SelectedTimestampStyle
+	} else if isCurrent {
+		// Current cue (playback position)
+		textStyle = ui.CurrentCueStyle
+		timestampStyle = ui.TimestampStyle
+	} else {
+		textStyle = ui.BaseStyle
+		timestampStyle = ui.TimestampStyle
+	}
+
+	timestampStr := timestampStyle.Render(timestamp)
+	textStr := textStyle.Render(cue.Text)
+
+	// Add selection indicator
+	prefix := "  "
+	if isSelected {
+		prefix = "> "
+	}
+
+	return fmt.Sprintf("%s%s\n%s%s\n", prefix, timestampStr, prefix, textStr)
+}
+
+// formatTimestamp formats start/end times as SRT timestamp
+func formatTimestamp(start, end time.Duration) string {
+	return fmt.Sprintf("%s --> %s",
+		formatTime(start),
+		formatTime(end),
+	)
+}
+
+// formatTime formats a duration as HH:MM:SS,mmm
+func formatTime(d time.Duration) string {
+	h := d / time.Hour
+	d -= h * time.Hour
+	m := d / time.Minute
+	d -= m * time.Minute
+	s := d / time.Second
+	d -= s * time.Second
+	ms := d / time.Millisecond
+
+	return fmt.Sprintf("%02d:%02d:%02d,%03d", h, m, s, ms)
+}
--- a/internal/ui/transcript/transcript.go
+++ b/internal/ui/transcript/transcript.go
@@ -0,0 +1,233 @@
+package transcript
+
+import (
+	"strings"
+	"time"
+
+	"github.com/charmbracelet/bubbles/viewport"
+	tea "github.com/charmbracelet/bubbletea"
+	"github.com/charmbracelet/lipgloss"
+	"playback/internal/srt"
+	"playback/internal/ui"
+)
+
+// SeekToCueMsg is sent when user wants to seek to a specific cue
+type SeekToCueMsg struct {
+	Position time.Duration
+}
+
+// Model represents the transcript view component
+type Model struct {
+	viewport    viewport.Model
+	transcript  *srt.Transcript
+	currentCue  int   // Cue currently playing (from playback position)
+	selectedCue int   // Cue selected by user navigation
+	cueLines    []int // Starting line number (in rendered view) for each cue
+	Width       int
+	Height      int
+	Focused     bool
+}
+
+// New creates a new transcript model
+func New() Model {
+	vp := viewport.New(80, 20)
+	vp.Style = lipgloss.NewStyle()
+
+	return Model{
+		viewport:    vp,
+		currentCue:  -1,
+		selectedCue: 0,
+	}
+}
+
+// SetTranscript sets the transcript to display
+func (m *Model) SetTranscript(t *srt.Transcript) {
+	m.transcript = t
+	m.selectedCue = 0
+	m.updateContent()
+	m.scrollToCue(0)
+}
+
+// Transcript returns the current transcript
+func (m *Model) Transcript() *srt.Transcript {
+	return m.transcript
+}
+
+// SelectedCueLineNumber returns the line number of the selected cue for vim
+func (m *Model) SelectedCueLineNumber() int {
+	if m.transcript == nil || m.selectedCue < 0 || m.selectedCue >= len(m.transcript.Cues) {
+		return 1
+	}
+	return m.transcript.Cues[m.selectedCue].LineNumber
+}
+
+// SetPosition updates which cue is highlighted based on playback position
+func (m *Model) SetPosition(pos time.Duration) {
+	if m.transcript == nil {
+		return
+	}
+
+	newCue := m.transcript.CueIndexAt(pos)
+	if newCue != m.currentCue {
+		m.currentCue = newCue
+		m.updateContent()
+		// Only auto-scroll if not focused (let user navigate freely when focused)
+		if !m.Focused && newCue >= 0 {
+			m.scrollToCue(newCue)
+		}
+	}
+}
+
+// SetSize sets the component dimensions
+func (m *Model) SetSize(width, height int) {
+	m.Width = width
+	m.Height = height
+	m.viewport.Width = width - 4 // Account for border
+	m.viewport.Height = height - 2
+	m.updateContent()
+}
+
+// SetFocused sets the focus state
+func (m *Model) SetFocused(focused bool) {
+	m.Focused = focused
+	// When focusing, sync selected cue to current playback position if valid
+	if focused && m.currentCue >= 0 {
+		m.selectedCue = m.currentCue
+		m.updateContent()
+		m.scrollToCue(m.selectedCue)
+	}
+}
+
+// ModeString returns the mode as a string
+func (m *Model) ModeString() string {
+	return "VIEW"
+}
+
+// Update handles messages
+func (m *Model) Update(msg tea.Msg) tea.Cmd {
+	if m.transcript == nil {
+		return nil
+	}
+
+	switch msg := msg.(type) {
+	case tea.KeyMsg:
+		switch msg.String() {
+		case "j", "down":
+			// Move to next cue
+			if m.selectedCue < len(m.transcript.Cues)-1 {
+				m.selectedCue++
+				m.refreshAndScroll()
+			}
+			return nil
+		case "k", "up":
+			// Move to previous cue
+			if m.selectedCue > 0 {
+				m.selectedCue--
+				m.refreshAndScroll()
+			}
+			return nil
+		case "ctrl+d":
+			// Jump 5 cues down
+			m.selectedCue += 5
+			if m.selectedCue >= len(m.transcript.Cues) {
+				m.selectedCue = len(m.transcript.Cues) - 1
+			}
+			m.refreshAndScroll()
+			return nil
+		case "ctrl+u":
+			// Jump 5 cues up
+			m.selectedCue -= 5
+			if m.selectedCue < 0 {
+				m.selectedCue = 0
+			}
+			m.refreshAndScroll()
+			return nil
+		case "g":
+			// Go to first cue
+			m.selectedCue = 0
+			m.refreshAndScroll()
+			return nil
+		case "G":
+			// Go to last cue
+			m.selectedCue = len(m.transcript.Cues) - 1
+			m.refreshAndScroll()
+			return nil
+		case "enter":
+			// Seek to selected cue
+			if m.selectedCue >= 0 && m.selectedCue < len(m.transcript.Cues) {
+				return func() tea.Msg {
+					return SeekToCueMsg{Position: m.transcript.Cues[m.selectedCue].Start}
+				}
+			}
+			return nil
+		}
+	}
+
+	return nil
+}
+
+// refreshAndScroll updates content and scrolls to selected cue
+func (m *Model) refreshAndScroll() {
+	m.updateContent()
+	m.scrollToCue(m.selectedCue)
+}
+
+func (m *Model) updateContent() {
+	if m.transcript == nil {
+		m.viewport.SetContent("No transcript loaded")
+		return
+	}
+
+	// Track line positions for each cue
+	m.cueLines = make([]int, len(m.transcript.Cues))
+	currentLine := 0
+
+	var sb strings.Builder
+	for i, cue := range m.transcript.Cues {
+		m.cueLines[i] = currentLine
+
+		isCurrent := i == m.currentCue
+		isSelected := i == m.selectedCue
+		rendered := RenderCue(&cue, isCurrent, isSelected, m.Width-4)
+		sb.WriteString(rendered)
+
+		// Count lines in this cue's rendering
+		currentLine += strings.Count(rendered, "\n")
+
+		if i < len(m.transcript.Cues)-1 {
+			sb.WriteString("\n")
+			currentLine++ // blank line between cues
+		}
+	}
+
+	m.viewport.SetContent(sb.String())
+}
+
+func (m *Model) scrollToCue(cueIndex int) {
+	if cueIndex < 0 || m.transcript == nil || cueIndex >= len(m.cueLines) {
+		return
+	}
+
+	targetLine := m.cueLines[cueIndex]
+
+	// Center the cue in the viewport
+	viewportHeight := m.viewport.Height
+	offset := targetLine - viewportHeight/2
+	if offset < 0 {
+		offset = 0
+	}
+
+	m.viewport.SetYOffset(offset)
+}
+
+// View renders the transcript
+func (m Model) View() string {
+	content := m.viewport.View()
+
+	style := ui.TranscriptStyle
+	if m.Focused {
+		style = ui.TranscriptFocusedStyle
+	}
+
+	return style.Width(m.Width - 2).Height(m.Height - 2).Render(content)
+}
--- a/internal/ui/waveform/render.go
+++ b/internal/ui/waveform/render.go
@@ -0,0 +1,77 @@
+package waveform
+
+// Block characters for waveform rendering (bottom to top)
+var blocks = []rune{' ', '▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'}
+
+// RenderWaveform converts normalized samples (0-1) to block characters
+func RenderWaveform(samples []float64, width int) string {
+	if len(samples) == 0 {
+		return ""
+	}
+
+	result := make([]rune, width)
+
+	for i := 0; i < width; i++ {
+		// Map position to sample index
+		sampleIdx := i * len(samples) / width
+		if sampleIdx >= len(samples) {
+			sampleIdx = len(samples) - 1
+		}
+
+		// Get sample value and map to block character
+		value := samples[sampleIdx]
+		if value < 0 {
+			value = 0
+		}
+		if value > 1 {
+			value = 1
+		}
+
+		blockIdx := int(value * float64(len(blocks)-1))
+		result[i] = blocks[blockIdx]
+	}
+
+	return string(result)
+}
+
+// RenderWaveformWithNeedle renders the waveform with a position indicator
+func RenderWaveformWithNeedle(samples []float64, width int, position float64) (string, int) {
+	waveform := RenderWaveform(samples, width)
+
+	// Calculate needle position
+	needlePos := int(position * float64(width))
+	if needlePos < 0 {
+		needlePos = 0
+	}
+	if needlePos >= width {
+		needlePos = width - 1
+	}
+
+	return waveform, needlePos
+}
+
+// RenderWithColors returns the waveform with the needle position marked
+// Returns: left part, needle char, right part
+func RenderWithColors(samples []float64, width int, position float64) (string, string, string) {
+	waveform := []rune(RenderWaveform(samples, width))
+	if len(waveform) == 0 {
+		return "", "|", ""
+	}
+
+	needlePos := int(position * float64(len(waveform)))
+	if needlePos < 0 {
+		needlePos = 0
+	}
+	if needlePos >= len(waveform) {
+		needlePos = len(waveform) - 1
+	}
+
+	left := string(waveform[:needlePos])
+	needle := "|"
+	right := ""
+	if needlePos+1 < len(waveform) {
+		right = string(waveform[needlePos+1:])
+	}
+
+	return left, needle, right
+}
--- a/internal/ui/waveform/waveform.go
+++ b/internal/ui/waveform/waveform.go
@@ -0,0 +1,133 @@
+package waveform
+
+import (
+	"time"
+
+	"github.com/charmbracelet/lipgloss"
+
+	"playback/internal/audio"
+	"playback/internal/ui"
+)
+
+// Model represents the waveform visualization component
+type Model struct {
+	Width    int
+	Height   int
+	Focused  bool
+	Samples  []float64
+	Position float64 // 0.0 to 1.0
+	Duration time.Duration
+}
+
+// New creates a new waveform model
+func New() Model {
+	return Model{
+		Height: 3,
+	}
+}
+
+// SetSamples sets the waveform samples
+func (m *Model) SetSamples(data *audio.WaveformData) {
+	if data != nil {
+		m.Samples = data.Normalized()
+	}
+}
+
+// SetPosition sets the playback position (0.0 to 1.0)
+func (m *Model) SetPosition(pos float64) {
+	m.Position = pos
+}
+
+// SetDuration sets the total duration
+func (m *Model) SetDuration(d time.Duration) {
+	m.Duration = d
+}
+
+// SetSize sets the component dimensions
+func (m *Model) SetSize(width, height int) {
+	m.Width = width
+	m.Height = height
+}
+
+// SetFocused sets the focus state
+func (m *Model) SetFocused(focused bool) {
+	m.Focused = focused
+}
+
+// View renders the waveform
+func (m Model) View() string {
+	contentWidth := m.Width - 4 // Account for border and padding
+
+	if contentWidth < 10 {
+		return ""
+	}
+
+	// Render waveform with needle position
+	left, needle, right := RenderWithColors(m.Samples, contentWidth, m.Position)
+
+	// Apply styles
+	waveformLine := lipgloss.JoinHorizontal(
+		lipgloss.Left,
+		ui.BaseStyle.Render(left),
+		ui.NeedleStyle.Render(needle),
+		ui.BaseStyle.Render(right),
+	)
+
+	// Time markers
+	startTime := "00:00"
+	endTime := formatDuration(m.Duration)
+	currentTime := formatDuration(time.Duration(m.Position * float64(m.Duration)))
+
+	timeMarkerWidth := contentWidth - len(startTime) - len(endTime)
+	if timeMarkerWidth < 0 {
+		timeMarkerWidth = 0
+	}
+
+	// Calculate current time position
+	currentTimePos := int(m.Position * float64(contentWidth))
+	currentTimeWidth := len(currentTime)
+
+	// Build time marker line
+	timeLine := ui.TimestampStyle.Render(startTime)
+
+	// Position current time
+	spaceBefore := currentTimePos - len(startTime) - currentTimeWidth/2
+	if spaceBefore < 0 {
+		spaceBefore = 0
+	}
+	spaceAfter := contentWidth - len(startTime) - spaceBefore - currentTimeWidth - len(endTime)
+	if spaceAfter < 0 {
+		spaceAfter = 0
+	}
+
+	timeLine = lipgloss.JoinHorizontal(
+		lipgloss.Left,
+		ui.TimestampStyle.Render(startTime),
+		lipgloss.NewStyle().Width(spaceBefore).Render(""),
+		ui.NeedleStyle.Render(currentTime),
+		lipgloss.NewStyle().Width(spaceAfter).Render(""),
+		ui.TimestampStyle.Render(endTime),
+	)
+
+	content := lipgloss.JoinVertical(
+		lipgloss.Left,
+		waveformLine,
+		timeLine,
+	)
+
+	// Apply border style based on focus
+	style := ui.WaveformStyle
+	if m.Focused {
+		style = ui.WaveformFocusedStyle
+	}
+
+	return style.Width(m.Width - 2).Render(content)
+}
+
+func formatDuration(d time.Duration) string {
+	d = d.Round(time.Second)
+	m := int(d / time.Minute)
+	s := int((d % time.Minute) / time.Second)
+	return string(rune('0'+m/10)) + string(rune('0'+m%10)) + ":" +
+		string(rune('0'+s/10)) + string(rune('0'+s%10))
+}