feat: initial commit
This commit is contained in:
80
internal/application/usecase/handle_voice_message.go
Normal file
80
internal/application/usecase/handle_voice_message.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package usecase
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"github.com/paramah/gw_telegram/internal/application/dto"
|
||||
"github.com/paramah/gw_telegram/internal/domain/port"
|
||||
)
|
||||
|
||||
type HandleVoiceMessage struct {
|
||||
downloader port.FileDownloader
|
||||
converter AudioConverter
|
||||
transcriber port.SpeechTranscriber
|
||||
textHandler *HandleTextMessage
|
||||
gateway port.MessageGateway
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// AudioConverter converts audio between formats (OGG -> WAV etc.)
|
||||
type AudioConverter interface {
|
||||
Convert(ctx context.Context, input []byte, fromMime, toMime string) ([]byte, error)
|
||||
}
|
||||
|
||||
func NewHandleVoiceMessage(
|
||||
downloader port.FileDownloader,
|
||||
converter AudioConverter,
|
||||
transcriber port.SpeechTranscriber,
|
||||
textHandler *HandleTextMessage,
|
||||
gateway port.MessageGateway,
|
||||
logger *slog.Logger,
|
||||
) *HandleVoiceMessage {
|
||||
return &HandleVoiceMessage{
|
||||
downloader: downloader,
|
||||
converter: converter,
|
||||
transcriber: transcriber,
|
||||
textHandler: textHandler,
|
||||
gateway: gateway,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *HandleVoiceMessage) Execute(ctx context.Context, in dto.IncomingMessageDTO) error {
|
||||
_ = h.gateway.SendTyping(ctx, in.ChatID)
|
||||
|
||||
audioBytes, mimeType, err := h.downloader.Download(ctx, in.VoiceFileID)
|
||||
if err != nil {
|
||||
h.logger.ErrorContext(ctx, "voice download failed", "error", err, "file_id", in.VoiceFileID)
|
||||
_ = h.gateway.SendText(ctx, in.ChatID, "Sorry, I couldn't download the audio message. Please try again.")
|
||||
return fmt.Errorf("handle voice: download: %w", err)
|
||||
}
|
||||
|
||||
// Convert OGG Opus (Telegram default) to WAV for Whisper
|
||||
if mimeType == "audio/ogg" || mimeType == "" {
|
||||
wavBytes, err := h.converter.Convert(ctx, audioBytes, "audio/ogg", "audio/wav")
|
||||
if err != nil {
|
||||
h.logger.WarnContext(ctx, "audio conversion failed, trying raw", "error", err)
|
||||
// fall through with original bytes
|
||||
} else {
|
||||
audioBytes = wavBytes
|
||||
mimeType = "audio/wav"
|
||||
}
|
||||
}
|
||||
|
||||
transcript, err := h.transcriber.Transcribe(ctx, audioBytes, mimeType)
|
||||
if err != nil {
|
||||
h.logger.ErrorContext(ctx, "transcription failed", "error", err)
|
||||
_ = h.gateway.SendText(ctx, in.ChatID, "Sorry, I couldn't understand the voice message. Please try sending text instead.")
|
||||
return fmt.Errorf("handle voice: transcribe: %w", err)
|
||||
}
|
||||
|
||||
h.logger.InfoContext(ctx, "voice transcribed", "user_id", in.UserID, "length", len(transcript))
|
||||
|
||||
textIn := in
|
||||
textIn.Text = transcript
|
||||
textIn.IsVoice = false
|
||||
|
||||
return h.textHandler.Execute(ctx, textIn)
|
||||
}
|
||||
Reference in New Issue
Block a user