package speech import ( "bytes" "context" "fmt" "os" "os/exec" "path/filepath" "github.com/google/uuid" ) type FFmpegConverter struct { ffmpegPath string tempDir string } func NewFFmpegConverter(ffmpegPath, tempDir string) *FFmpegConverter { if ffmpegPath == "" { ffmpegPath = "ffmpeg" } if tempDir == "" { tempDir = os.TempDir() } return &FFmpegConverter{ffmpegPath: ffmpegPath, tempDir: tempDir} } func (c *FFmpegConverter) Convert(ctx context.Context, input []byte, fromMime, toMime string) ([]byte, error) { id := uuid.New().String() inFile := filepath.Join(c.tempDir, id+".input") outFile := filepath.Join(c.tempDir, id+".wav") defer os.Remove(inFile) defer os.Remove(outFile) if err := os.WriteFile(inFile, input, 0600); err != nil { return nil, fmt.Errorf("write temp input: %w", err) } cmd := exec.CommandContext(ctx, c.ffmpegPath, "-i", inFile, "-ar", "16000", "-ac", "1", "-f", "wav", "-y", outFile, ) var stderr bytes.Buffer cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return nil, fmt.Errorf("ffmpeg conversion: %w: %s", err, stderr.String()) } out, err := os.ReadFile(outFile) if err != nil { return nil, fmt.Errorf("read converted file: %w", err) } return out, nil }