Files
gateway-telegram/internal/infrastructure/speech/audio_converter.go

64 lines
1.2 KiB
Go

package speech
import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"github.com/google/uuid"
)
type FFmpegConverter struct {
ffmpegPath string
tempDir string
}
func NewFFmpegConverter(ffmpegPath, tempDir string) *FFmpegConverter {
if ffmpegPath == "" {
ffmpegPath = "ffmpeg"
}
if tempDir == "" {
tempDir = os.TempDir()
}
return &FFmpegConverter{ffmpegPath: ffmpegPath, tempDir: tempDir}
}
func (c *FFmpegConverter) Convert(ctx context.Context, input []byte, fromMime, toMime string) ([]byte, error) {
id := uuid.New().String()
inFile := filepath.Join(c.tempDir, id+".input")
outFile := filepath.Join(c.tempDir, id+".wav")
defer os.Remove(inFile)
defer os.Remove(outFile)
if err := os.WriteFile(inFile, input, 0600); err != nil {
return nil, fmt.Errorf("write temp input: %w", err)
}
cmd := exec.CommandContext(ctx, c.ffmpegPath,
"-i", inFile,
"-ar", "16000",
"-ac", "1",
"-f", "wav",
"-y",
outFile,
)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("ffmpeg conversion: %w: %s", err, stderr.String())
}
out, err := os.ReadFile(outFile)
if err != nil {
return nil, fmt.Errorf("read converted file: %w", err)
}
return out, nil
}