package speech import ( "bytes" "context" "fmt" "os" "os/exec" "path/filepath" "github.com/google/uuid" ) type FFmpegConverter struct { ffmpegPath string tempDir string } func NewFFmpegConverter(ffmpegPath, tempDir string) *FFmpegConverter { if ffmpegPath == "" { ffmpegPath = "ffmpeg" } if tempDir == "" { tempDir = os.TempDir() } return &FFmpegConverter{ffmpegPath: ffmpegPath, tempDir: tempDir} } // Convert converts audio bytes between formats and returns the converted bytes. // Intermediate temp files are cleaned up automatically — use ConvertFile when // you need to keep the output on disk. func (c *FFmpegConverter) Convert(ctx context.Context, input []byte, fromMime, toMime string) ([]byte, error) { id := uuid.New().String() inFile := filepath.Join(c.tempDir, id+".input") outFile := filepath.Join(c.tempDir, id+".wav") defer os.Remove(inFile) defer os.Remove(outFile) if err := os.WriteFile(inFile, input, 0600); err != nil { return nil, fmt.Errorf("write temp input: %w", err) } if err := c.runFFmpeg(ctx, inFile, outFile); err != nil { return nil, err } out, err := os.ReadFile(outFile) if err != nil { return nil, fmt.Errorf("read converted file: %w", err) } return out, nil } // ConvertFile converts audio bytes and writes the WAV output to outPath. // The caller is responsible for deleting outPath when no longer needed. func (c *FFmpegConverter) ConvertFile(ctx context.Context, input []byte, outPath string) error { id := uuid.New().String() inFile := filepath.Join(c.tempDir, id+".input") defer os.Remove(inFile) if err := os.WriteFile(inFile, input, 0600); err != nil { return fmt.Errorf("write temp input: %w", err) } return c.runFFmpeg(ctx, inFile, outPath) } func (c *FFmpegConverter) runFFmpeg(ctx context.Context, inFile, outFile string) error { cmd := exec.CommandContext(ctx, c.ffmpegPath, "-i", inFile, "-ar", "16000", "-ac", "1", "-f", "wav", "-y", outFile, ) var stderr bytes.Buffer cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return fmt.Errorf("ffmpeg conversion: %w: %s", err, stderr.String()) } return nil }