Enable world level timestamps for OpenAI Whisper

55e4b488 · Ahmet Öner · 986a18af · 55e4b488 · 55e4b488 · 55e4b488
Commit 55e4b488 authored Oct 1, 2023 by Ahmet Öner
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,7 @@ Unreleased
 ### Changed
 - Upgraded
-  - OpenAI Whisper to v20230918
+  - [openai/whisper](https://github.com/openai/whisper) to [v20230918](https://github.com/openai/whisper/releases/tag/v20230918)
 [1.1.1] (2023-05-29)
 --------------------


--- a/app/openai_whisper/core.py
+++ b/app/openai_whisper/core.py
@@ -28,6 +28,8 @@ def transcribe(
        options_dict["language"] = language
    if initial_prompt:
        options_dict["initial_prompt"] = initial_prompt
+    if word_timestamps:
+        options_dict["word_timestamps"] = word_timestamps
    with model_lock:
        result = model.transcribe(audio, **options_dict)


--- a/app/webservice.py
+++ b/app/webservice.py
@@ -66,11 +66,7 @@ async def asr(
        audio_file: UploadFile = File(...),
        encode: bool = Query(default=True, description="Encode audio first through ffmpeg"),
        output: Union[str, None] = Query(default="txt", enum=["txt", "vtt", "srt", "tsv", "json"]),
-        word_timestamps: bool = Query(
+        word_timestamps: bool = Query(default=False, description="World level timestamps")
-            default=False,
-            description="World level timestamps",
-            include_in_schema=(True if ASR_ENGINE == "faster_whisper" else False)
-        )
 ):
    result = transcribe(load_audio(audio_file.file, encode), task, language, initial_prompt, word_timestamps, output)
    return StreamingResponse(