Improve WhisperX implementation

b9bc36bb · Ahmet Öner · 4298f3f5 · b9bc36bb · b9bc36bb · b9bc36bb
Commit b9bc36bb authored 5 months ago by Ahmet Öner
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,18 +6,19 @@ Current release (v1.7.1) supports following whisper models:
 - [openai/whisper](https://github.com/openai/whisper)@[v20240930](https://github.com/openai/whisper/releases/tag/v20240930)
 - [SYSTRAN/faster-whisper](https://github.com/SYSTRAN/faster-whisper)@[v1.1.0](https://github.com/SYSTRAN/faster-whisper/releases/tag/v1.1.0)
+- [whisperX](https://github.com/m-bain/whisperX)@[v3.1.1](https://github.com/m-bain/whisperX/releases/tag/v3.1.1)
 ## Quick Usage
 === ":octicons-file-code-16: `CPU`"
-    ```sh
+    ```shell
    docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
    ```
 === ":octicons-file-code-16: `GPU`"
-    ```sh
+    ```shell
    docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu
    ```

--- a/docs/run.md
+++ b/docs/run.md
@@ -6,9 +6,12 @@ Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice>
 === ":octicons-file-code-16: `CPU`"
-    ```sh
+    ```shell
    docker pull onerahmet/openai-whisper-asr-webservice:latest
-    docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
+    docker run -d -p 9000:9000 \
+      -e ASR_MODEL=base \
+      -e ASR_ENGINE=openai_whisper \
+      onerahmet/openai-whisper-asr-webservice:latest
    ```
 === ":octicons-file-code-16: `CPU (macOS)`"
@@ -17,38 +20,57 @@ Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice>
    > 
    > The `:latest` image tag provides both amd64 and arm64 architectures:
-    ```sh
+    ```shell
    docker pull onerahmet/openai-whisper-asr-webservice:latest
-    docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
+    docker run -d -p 9000:9000 \
+      -e ASR_MODEL=base \
+      -e ASR_ENGINE=openai_whisper \
+      onerahmet/openai-whisper-asr-webservice:latest
    ```
 === ":octicons-file-code-16: `GPU`"
-    ```sh
+    ```shell
    docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu
-    docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu
+    docker run -d --gpus all -p 9000:9000 \
+      -e ASR_MODEL=base \
+      -e ASR_ENGINE=openai_whisper \
+      onerahmet/openai-whisper-asr-webservice:latest-gpu
    ```
+### Environment Variables
+The following environment variables can be used to configure the service:
+- `ASR_MODEL`: Whisper model to use (tiny, base, small, medium, large) [default: base]
+- `ASR_ENGINE`: ASR engine to use (openai_whisper, faster_whisper) [default: openai_whisper]
+- `ASR_MODEL_PATH`: Custom path to store/load model files [optional]
 > Interactive Swagger API documentation is available at <http://localhost:9000/docs>
 ![Swagger UI](assets/images/swagger-ui.png)
 ## Cache
-The ASR model is downloaded each time you start the container, using the large model this can take some time.
+The ASR model is downloaded each time you start the container. Using the large model can take significant time to download.
-If you want to decrease the time it takes to start your container by skipping the download, you can store the cache directory (`~/.cache/whisper` or `/root/.cache/whisper`) to a persistent storage.
+To reduce container startup time by avoiding repeated downloads, you can persist the cache directory to local storage.
-Next time you start your container the ASR Model will be taken from the cache instead of being downloaded again.
+The model will then be loaded from the cache instead of being downloaded again on subsequent container starts.
-**Important this will prevent you from receiving any updates to the models.**
+**Important: Using a persistent cache will prevent you from receiving model updates.**
 === ":octicons-file-code-16: `Default cache dir`"
-    ```sh
+    ```shell
-    docker run -d -p 9000:9000 -v $PWD/yourlocaldir:/root/.cache/whisper onerahmet/openai-whisper-asr-webservice:latest
+    docker run -d -p 9000:9000 \
+      -v $PWD/cache:/root/.cache \
+      onerahmet/openai-whisper-asr-webservice:latest
    ```
 === ":octicons-file-code-16: `With ASR_MODEL_PATH`"
-    ```sh
+    ```shell
-    docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v $PWD/yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest
+    docker run -d -p 9000:9000 \
+      -e ASR_MODEL_PATH=/data/whisper \
+      -v $PWD/cache:/data/whisper \
+      onerahmet/openai-whisper-asr-webservice:latest
    ```
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,7 +5,7 @@ site_dir: public
 site_description: "OpenAI Whisper ASR Webservice API"
 repo_url: "https://github.com/ahmetoner/whisper-asr-webservice"
 repo_name: "ahmetoner/whisper-asr-webservice"
-copyright: Copyright &copy; 2023
+copyright: Copyright &copy; 2025
 edit_uri: edit/main/docs/
 validation:
@@ -14,14 +14,13 @@ validation:
  unrecognized_links: warn
 nav:
-  - Home:
+  - Overview: index.md
-      - Whisper ASR Webservice: index.md
+  - Installation & Usage: run.md
-      - Run: run.md
+  - API Endpoints: endpoints.md
-      - Endpoints: endpoints.md
+  - Configuration: environmental-variables.md
-      - Environmental Variables: environmental-variables.md
+  - Development: build.md
-      - Build: build.md
  - Changelog: changelog.md
-      - Licence: licence.md
+  - License: licence.md
  - Releases: https://github.com/ahmetoner/whisper-asr-webservice/releases
  - Docker Hub: https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice
@@ -39,13 +38,38 @@ theme:
    - content.tooltips
    - navigation.footer
    - navigation.indexes
-    - navigation.sections
+    # - navigation.sections # important
-    - navigation.tabs
-    - navigation.tabs.sticky
    - navigation.top
+    # - navigation.tabs
+    # - navigation.tabs.sticky
    - search.highlight
    - search.suggest
    - toc.follow
+    - toc.integrate
+  palette:
+    # System preference
+    - media: "(prefers-color-scheme)"
+      toggle:
+        icon: material/brightness-auto
+        name: Switch to light mode
+    # Light mode
+    - media: "(prefers-color-scheme: light)"
+      scheme: default
+      primary: custom
+      accent: teal
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: black
+      accent: lime
+      toggle:
+        icon: material/brightness-4
+        name: Switch to system preference
 extra_css:
  - assets/css/extra.css
@@ -80,6 +104,7 @@ plugins:
  - search
 extra:
+  generator: false 
  social:
    - icon: fontawesome/brands/github
      link: https://github.com/ahmetoner

--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,28 +11,24 @@ packages = [{ include = "app" }]
 [tool.poetry.scripts]
 whisper-asr-webservice = "app.webservice:start"
-# Package Sources
 [[tool.poetry.source]]
 name = "pytorch"
 url = "https://download.pytorch.org/whl/cpu"
 priority = "explicit"
-# Core Dependencies
 [tool.poetry.dependencies]
-python = "^3.10"
+python = "<3.13,>=3.10"
 fastapi = "^0.115.8"
 uvicorn = { extras = ["standard"], version = "^0.34.0" }
 python-multipart = "^0.0.20"
 ffmpeg-python = "^0.2.0"
+numpy = "<2.0.0"
-# ML Dependencies
 openai-whisper = "^20240930"
-faster-whisper = "^1.1.1"
+faster-whisper = "^1.1.0"
+whisperx = "^3.3.1"
 tqdm = "^4.67.1"
 llvmlite = "^0.44.0"
 numba = "^0.61.0"
-# PyTorch Dependencies (Platform Specific)
 torch = [
  { markers = "sys_platform == 'darwin' and platform_machine == 'arm64'", url = "https://download.pytorch.org/whl/cpu/torch-2.6.0-cp310-none-macosx_11_0_arm64.whl" },
  { markers = "sys_platform == 'linux' and platform_machine == 'arm64'", url = "https://download.pytorch.org/whl/cpu/torch-2.6.0-cp310-none-macosx_11_0_arm64.whl" },
@@ -42,27 +38,21 @@ torch = [
  { markers = "sys_platform == 'win' and platform_machine == 'amd64'", url = "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-win_amd64.whl" },
 ]
-# Development Dependencies
 [poetry.group.dev.dependencies]
 pytest = "^8.3.4"
 ruff = "^0.9.6"
 black = "^25.1.0"
-mkdocs = "^1.6.1"
 mkdocs-material = "^9.6.4"
 pymdown-extensions = "^10.14.3"
-# Build System Configuration
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
-# Code Formatting Configuration
 [tool.black]
 skip-string-normalization = true
 line-length = 120
-# Linting Configuration
 [tool.ruff]
 line-length = 120