diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c8c9c4f727866d006c3374d629e7881e94d7d84..d2b4ec2a104783e47dd095de038422128e20c5a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Changelog Unreleased ---------- +### Added + + - Added detection confidence to langauge detection endpoint + [1.6.0] (2024-10-06) -------------------- diff --git a/app/faster_whisper/core.py b/app/faster_whisper/core.py index 1d48f2d6106907cc1dcbf5ceada213498aab13d1..c99e04927baa86b1bc50670d706f3cb54b1a898d 100644 --- a/app/faster_whisper/core.py +++ b/app/faster_whisper/core.py @@ -70,8 +70,9 @@ def language_detection(audio): with model_lock: segments, info = model.transcribe(audio, beam_size=5) detected_lang_code = info.language + detected_language_confidence = info.language_probability - return detected_lang_code + return detected_lang_code, detected_language_confidence def write_result(result: dict, file: BinaryIO, output: Union[str, None]): diff --git a/app/openai_whisper/core.py b/app/openai_whisper/core.py index 88bde4f2b20f8a91887d5dbdc7ca049cc58d39d9..6ccee136ef68378645187f0b6bc1ef39a60a1830 100644 --- a/app/openai_whisper/core.py +++ b/app/openai_whisper/core.py @@ -55,7 +55,7 @@ def language_detection(audio): _, probs = model.detect_language(mel) detected_lang_code = max(probs, key=probs.get) - return detected_lang_code + return detected_lang_code, probs[max(probs)] def write_result(result: dict, file: BinaryIO, output: Union[str, None]): diff --git a/app/webservice.py b/app/webservice.py index 5391afed997deb7bdf1cb1f918515ea300319a5c..84efa8c0def8bc9396c0e57e869183867938dde1 100644 --- a/app/webservice.py +++ b/app/webservice.py @@ -89,8 +89,8 @@ async def detect_language( audio_file: UploadFile = File(...), # noqa: B008 encode: bool = Query(default=True, description="Encode audio first through FFmpeg"), ): - detected_lang_code = language_detection(load_audio(audio_file.file, encode)) - return {"detected_language": tokenizer.LANGUAGES[detected_lang_code], "language_code": detected_lang_code} + detected_lang_code, confidence = language_detection(load_audio(audio_file.file, encode)) + return {"detected_language": tokenizer.LANGUAGES[detected_lang_code], "language_code": detected_lang_code, "confidence": confidence} def load_audio(file: BinaryIO, encode=True, sr: int = SAMPLE_RATE):