From 6fd7eeb0b95d6d38cd1908f87635fdf4c154be79 Mon Sep 17 00:00:00 2001 From: Aidan Crowther <aidan.crowther@outlook.com> Date: Thu, 12 Dec 2024 12:05:09 -0500 Subject: [PATCH] Configure openai_whisper model to correctly laod n_dims with larger models --- CHANGELOG.md | 4 ++++ app/openai_whisper/core.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c8c9c4..c9d3481 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Changelog Unreleased ---------- +### Added + + - Set mel generation to adjust n_dims automatically to match the loaded model + [1.6.0] (2024-10-06) -------------------- diff --git a/app/openai_whisper/core.py b/app/openai_whisper/core.py index 88bde4f..e93f5ff 100644 --- a/app/openai_whisper/core.py +++ b/app/openai_whisper/core.py @@ -48,7 +48,7 @@ def language_detection(audio): audio = whisper.pad_or_trim(audio) # make log-Mel spectrogram and move to the same device as the model - mel = whisper.log_mel_spectrogram(audio).to(model.device) + mel = whisper.log_mel_spectrogram(audio, model.dims.n_mels).to(model.device) # detect the spoken language with model_lock: -- GitLab