From 6fd7eeb0b95d6d38cd1908f87635fdf4c154be79 Mon Sep 17 00:00:00 2001
From: Aidan Crowther <aidan.crowther@outlook.com>
Date: Thu, 12 Dec 2024 12:05:09 -0500
Subject: [PATCH] Configure openai_whisper model to correctly laod n_dims with
larger models
---
CHANGELOG.md | 4 ++++
app/openai_whisper/core.py | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9c8c9c4..c9d3481 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ Changelog
Unreleased
----------
+### Added
+
+ - Set mel generation to adjust n_dims automatically to match the loaded model
+
[1.6.0] (2024-10-06)
--------------------
diff --git a/app/openai_whisper/core.py b/app/openai_whisper/core.py
index 88bde4f..e93f5ff 100644
--- a/app/openai_whisper/core.py
+++ b/app/openai_whisper/core.py
@@ -48,7 +48,7 @@ def language_detection(audio):
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
+ mel = whisper.log_mel_spectrogram(audio, model.dims.n_mels).to(model.device)
# detect the spoken language
with model_lock:
--
GitLab