Files
gateway-telegram/internal/infrastructure/speech/openai_whisper.go

90 lines
2.1 KiB
Go

package speech
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"time"
)
const whisperAPIURL = "https://api.openai.com/v1/audio/transcriptions"
type OpenAIWhisper struct {
apiKey string
model string
language string
client *http.Client
}
func NewOpenAIWhisper(apiKey, model, language string) *OpenAIWhisper {
return &OpenAIWhisper{
apiKey: apiKey,
model: model,
language: language,
client: &http.Client{Timeout: 60 * time.Second},
}
}
type whisperResponse struct {
Text string `json:"text"`
}
func (w *OpenAIWhisper) Transcribe(ctx context.Context, audioData []byte, mimeType string) (string, error) {
var buf bytes.Buffer
mw := multipart.NewWriter(&buf)
fw, err := mw.CreateFormFile("file", "audio.wav")
if err != nil {
return "", fmt.Errorf("create form file: %w", err)
}
if _, err := fw.Write(audioData); err != nil {
return "", fmt.Errorf("write audio data: %w", err)
}
if err := mw.WriteField("model", w.model); err != nil {
return "", fmt.Errorf("write model field: %w", err)
}
if err := mw.WriteField("response_format", "json"); err != nil {
return "", fmt.Errorf("write response_format: %w", err)
}
if w.language != "" {
if err := mw.WriteField("language", w.language); err != nil {
return "", fmt.Errorf("write language field: %w", err)
}
}
mw.Close()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, whisperAPIURL, &buf)
if err != nil {
return "", fmt.Errorf("create whisper request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+w.apiKey)
req.Header.Set("Content-Type", mw.FormDataContentType())
resp, err := w.client.Do(req)
if err != nil {
return "", fmt.Errorf("whisper API call: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("read whisper response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("whisper API error %d: %s", resp.StatusCode, string(body))
}
var result whisperResponse
if err := json.Unmarshal(body, &result); err != nil {
return "", fmt.Errorf("parse whisper response: %w", err)
}
return result.Text, nil
}