feat: initial commit
This commit is contained in:
63
internal/infrastructure/speech/audio_converter.go
Normal file
63
internal/infrastructure/speech/audio_converter.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package speech
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type FFmpegConverter struct {
|
||||
ffmpegPath string
|
||||
tempDir string
|
||||
}
|
||||
|
||||
func NewFFmpegConverter(ffmpegPath, tempDir string) *FFmpegConverter {
|
||||
if ffmpegPath == "" {
|
||||
ffmpegPath = "ffmpeg"
|
||||
}
|
||||
if tempDir == "" {
|
||||
tempDir = os.TempDir()
|
||||
}
|
||||
return &FFmpegConverter{ffmpegPath: ffmpegPath, tempDir: tempDir}
|
||||
}
|
||||
|
||||
func (c *FFmpegConverter) Convert(ctx context.Context, input []byte, fromMime, toMime string) ([]byte, error) {
|
||||
id := uuid.New().String()
|
||||
inFile := filepath.Join(c.tempDir, id+".input")
|
||||
outFile := filepath.Join(c.tempDir, id+".wav")
|
||||
|
||||
defer os.Remove(inFile)
|
||||
defer os.Remove(outFile)
|
||||
|
||||
if err := os.WriteFile(inFile, input, 0600); err != nil {
|
||||
return nil, fmt.Errorf("write temp input: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, c.ffmpegPath,
|
||||
"-i", inFile,
|
||||
"-ar", "16000",
|
||||
"-ac", "1",
|
||||
"-f", "wav",
|
||||
"-y",
|
||||
outFile,
|
||||
)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("ffmpeg conversion: %w: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
out, err := os.ReadFile(outFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read converted file: %w", err)
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
89
internal/infrastructure/speech/openai_whisper.go
Normal file
89
internal/infrastructure/speech/openai_whisper.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package speech
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
const whisperAPIURL = "https://api.openai.com/v1/audio/transcriptions"
|
||||
|
||||
type OpenAIWhisper struct {
|
||||
apiKey string
|
||||
model string
|
||||
language string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func NewOpenAIWhisper(apiKey, model, language string) *OpenAIWhisper {
|
||||
return &OpenAIWhisper{
|
||||
apiKey: apiKey,
|
||||
model: model,
|
||||
language: language,
|
||||
client: &http.Client{Timeout: 60 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
type whisperResponse struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
func (w *OpenAIWhisper) Transcribe(ctx context.Context, audioData []byte, mimeType string) (string, error) {
|
||||
var buf bytes.Buffer
|
||||
mw := multipart.NewWriter(&buf)
|
||||
|
||||
fw, err := mw.CreateFormFile("file", "audio.wav")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create form file: %w", err)
|
||||
}
|
||||
if _, err := fw.Write(audioData); err != nil {
|
||||
return "", fmt.Errorf("write audio data: %w", err)
|
||||
}
|
||||
|
||||
if err := mw.WriteField("model", w.model); err != nil {
|
||||
return "", fmt.Errorf("write model field: %w", err)
|
||||
}
|
||||
if err := mw.WriteField("response_format", "json"); err != nil {
|
||||
return "", fmt.Errorf("write response_format: %w", err)
|
||||
}
|
||||
if w.language != "" {
|
||||
if err := mw.WriteField("language", w.language); err != nil {
|
||||
return "", fmt.Errorf("write language field: %w", err)
|
||||
}
|
||||
}
|
||||
mw.Close()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, whisperAPIURL, &buf)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create whisper request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+w.apiKey)
|
||||
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
|
||||
resp, err := w.client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("whisper API call: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read whisper response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("whisper API error %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var result whisperResponse
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return "", fmt.Errorf("parse whisper response: %w", err)
|
||||
}
|
||||
|
||||
return result.Text, nil
|
||||
}
|
||||
Reference in New Issue
Block a user