134 lines
4.0 KiB
Go
134 lines
4.0 KiB
Go
package usecase
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/paramah/gw_telegram/internal/application/dto"
|
|
"github.com/paramah/gw_telegram/internal/domain/entity"
|
|
"github.com/paramah/gw_telegram/internal/domain/port"
|
|
)
|
|
|
|
type HandleVoiceMessage struct {
|
|
downloader port.FileDownloader
|
|
converter AudioConverter
|
|
transcriber port.SpeechTranscriber
|
|
fileStore port.VoiceFileStore
|
|
textHandler *HandleTextMessage
|
|
gateway port.MessageGateway
|
|
logger *slog.Logger
|
|
}
|
|
|
|
// AudioConverter converts audio between formats (OGG → WAV etc.)
|
|
type AudioConverter interface {
|
|
Convert(ctx context.Context, input []byte, fromMime, toMime string) ([]byte, error)
|
|
}
|
|
|
|
func NewHandleVoiceMessage(
|
|
downloader port.FileDownloader,
|
|
converter AudioConverter,
|
|
transcriber port.SpeechTranscriber,
|
|
fileStore port.VoiceFileStore,
|
|
textHandler *HandleTextMessage,
|
|
gateway port.MessageGateway,
|
|
logger *slog.Logger,
|
|
) *HandleVoiceMessage {
|
|
return &HandleVoiceMessage{
|
|
downloader: downloader,
|
|
converter: converter,
|
|
transcriber: transcriber,
|
|
fileStore: fileStore,
|
|
textHandler: textHandler,
|
|
gateway: gateway,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
func (h *HandleVoiceMessage) Execute(ctx context.Context, in dto.IncomingMessageDTO) error {
|
|
_ = h.gateway.SendTyping(ctx, in.ChatID)
|
|
|
|
sample := entity.VoiceSample{
|
|
ID: uuid.New().String(),
|
|
UserID: in.UserID,
|
|
ChatID: in.ChatID,
|
|
FileIDTg: in.VoiceFileID,
|
|
CreatedAt: time.Now(),
|
|
}
|
|
|
|
// Step 1: download OGG from Telegram and persist to disk
|
|
audioBytes, mimeType, err := h.downloader.Download(ctx, in.VoiceFileID)
|
|
if err != nil {
|
|
h.logger.ErrorContext(ctx, "voice download failed", "error", err, "file_id", in.VoiceFileID)
|
|
_ = h.gateway.SendText(ctx, in.ChatID, "Sorry, I couldn't download the audio message. Please try again.")
|
|
return fmt.Errorf("handle voice: download: %w", err)
|
|
}
|
|
|
|
sample.OGGPath, err = h.fileStore.SaveRaw(ctx, sample.ID, audioBytes)
|
|
if err != nil {
|
|
h.logger.WarnContext(ctx, "failed to persist raw audio", "error", err, "sample_id", sample.ID)
|
|
}
|
|
|
|
// Step 2: convert OGG → WAV and persist to disk
|
|
if mimeType == "audio/ogg" || mimeType == "" {
|
|
wavBytes, convErr := h.converter.Convert(ctx, audioBytes, "audio/ogg", "audio/wav")
|
|
if convErr != nil {
|
|
h.logger.WarnContext(ctx, "audio conversion failed, proceeding with raw audio", "error", convErr)
|
|
} else {
|
|
audioBytes = wavBytes
|
|
mimeType = "audio/wav"
|
|
|
|
sample.WAVPath, err = h.fileStore.SaveConverted(ctx, sample.ID, wavBytes)
|
|
if err != nil {
|
|
h.logger.WarnContext(ctx, "failed to persist converted audio", "error", err, "sample_id", sample.ID)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 3: transcribe — receive both text and VTT
|
|
result, err := h.transcriber.Transcribe(ctx, audioBytes, mimeType)
|
|
if err != nil {
|
|
h.logger.ErrorContext(ctx, "transcription failed", "error", err, "sample_id", sample.ID)
|
|
_ = h.gateway.SendText(ctx, in.ChatID, "Sorry, I couldn't understand the voice message. Please try sending text instead.")
|
|
_ = h.fileStore.Cleanup(ctx, sample.ID)
|
|
return fmt.Errorf("handle voice: transcribe: %w", err)
|
|
}
|
|
|
|
sample.Transcript = result.Text
|
|
|
|
// Step 4: persist VTT file
|
|
if result.VTT != "" {
|
|
sample.VTTPath, err = h.fileStore.SaveVTT(ctx, sample.ID, result.VTT)
|
|
if err != nil {
|
|
h.logger.WarnContext(ctx, "failed to persist vtt", "error", err, "sample_id", sample.ID)
|
|
}
|
|
}
|
|
|
|
h.logger.InfoContext(ctx, "voice sample analysed",
|
|
"user_id", in.UserID,
|
|
"sample_id", sample.ID,
|
|
"ogg_path", sample.OGGPath,
|
|
"wav_path", sample.WAVPath,
|
|
"vtt_path", sample.VTTPath,
|
|
"transcript_len", len(result.Text),
|
|
)
|
|
|
|
// Step 5: route transcript as a regular text message
|
|
textIn := in
|
|
textIn.Text = result.Text
|
|
textIn.IsVoice = false
|
|
|
|
if err := h.textHandler.Execute(ctx, textIn); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Step 6: cleanup only after successful analysis and dispatch
|
|
if cleanErr := h.fileStore.Cleanup(ctx, sample.ID); cleanErr != nil {
|
|
h.logger.WarnContext(ctx, "failed to cleanup voice files", "error", cleanErr, "sample_id", sample.ID)
|
|
}
|
|
|
|
return nil
|
|
}
|