From 7bf22dd672853523cb160c4b650f2ea8996d7d30 Mon Sep 17 00:00:00 2001
From: Ahmet Oner <me@ahmetoner.com>
Date: Mon, 2 Oct 2023 01:20:19 +0200
Subject: [PATCH] Add MkDocs

---
 .github/workflows/documentation.yml        |  26 +++
 .gitignore                                 |   4 +-
 README.md                                  | 184 +--------------------
 docs/.overrides/main.html                  |  12 ++
 docs/assets/css/extra.css                  |   5 +
 docs/assets/{img => images}/swagger-ui.png | Bin
 docs/build.md                              |  80 +++++++++
 docs/changelog.md                          |   1 +
 docs/endpoints.md                          |  37 +++++
 docs/environmental-variables.md            |  27 +++
 docs/index.md                              |   7 +
 docs/licence.md                            |   5 +
 docs/run.md                                |  53 ++++++
 mkdocs.yml                                 |  87 ++++++++++
 14 files changed, 345 insertions(+), 183 deletions(-)
 create mode 100644 .github/workflows/documentation.yml
 create mode 100644 docs/.overrides/main.html
 create mode 100644 docs/assets/css/extra.css
 rename docs/assets/{img => images}/swagger-ui.png (100%)
 create mode 100644 docs/build.md
 create mode 100644 docs/changelog.md
 create mode 100644 docs/endpoints.md
 create mode 100644 docs/environmental-variables.md
 create mode 100644 docs/index.md
 create mode 100644 docs/licence.md
 create mode 100644 docs/run.md
 create mode 100644 mkdocs.yml

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 0000000..fee42de
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,26 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+      - docs
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: github.event.repository.fork == false
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.x
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v3
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: pip install mkdocs-material pymdown-extensions
+      - run: mkdocs gh-deploy --force
diff --git a/.gitignore b/.gitignore
index 2e77e32..4dbf939 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,4 +39,6 @@ MANIFEST.in
 pip-wheel-metadata
 /poetry.toml
 
-poetry/core/*
\ No newline at end of file
+poetry/core/*
+
+public
\ No newline at end of file
diff --git a/README.md b/README.md
index 2198f63..3b171b7 100644
--- a/README.md
+++ b/README.md
@@ -12,185 +12,5 @@ Current release (v1.2.0) supports following whisper models:
 - [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918)
 - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0)
 
-## Usage
-
-Whisper ASR Webservice now available on Docker Hub. You can find the latest version of this repository on docker hub for CPU and GPU.
-
-Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice>
-
-For CPU:
-
-```sh
-docker pull onerahmet/openai-whisper-asr-webservice:latest
-docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
-```
-
-For GPU:
-
-```sh
-docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu
-docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu
-```
-
-For macOS (CPU only):
-
-GPU passthrough does not work on macOS due to fundamental design limitations of Docker. Docker actually runs containers within a LinuxVM on macOS. If you wish to run GPU-accelerated containers, I'm afraid Linux is your only option.
-
-The `:latest` image tag provides both amd64 and arm64 architectures:
-
-```sh
-docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
-```
-
-```sh
-# Interactive Swagger API documentation is available at http://localhost:9000/docs
-```
-![Swagger UI](https://github.com/ahmetoner/whisper-asr-webservice/blob/main/docs/assets/img/swagger-ui.png?raw=true)
-
-Available ASR_MODELs are `tiny`, `base`, `small`, `medium`, `large`, `large-v1` and `large-v2`. Please note that `large` and `large-v2` are the same model.
-
-For English-only applications, the `.en` models tend to perform better, especially for the `tiny.en` and `base.en` models. We observed that the difference becomes less significant for the `small.en` and `medium.en` models.
-
-## Run (Development Environment)
-
-Install poetry with following command:
-
-```sh
-pip3 install poetry
-```
-
-Install torch with following command:
-
-```sh
-# just for GPU:
-pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
-```
-
-Install packages:
-
-```sh
-poetry install
-```
-
-Starting the Webservice:
-
-```sh
-poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
-```
-
-With docker compose:
-
-For CPU:
-```sh
-docker-compose up --build
-```
-
-For GPU:
-```sh
-docker-compose up --build -f docker-compose.gpu.yml
-```
-
-## Quick start
-
-After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
-
-There are 2 endpoints available:
-
-- /asr (TXT, VTT, SRT, TSV, JSON)
-- /detect-language
-
-## Automatic Speech recognition service /asr
-
-If you choose the **transcribe** task, transcribes the uploaded file. Both audio and video files are supported (as long as ffmpeg supports it).
-
-Note that you can also upload video formats directly as long as they are supported by ffmpeg.
-
-You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint.
-
-You can provide the language or it will be automatically recognized.
-
-If you choose the **translate** task it will provide an English transcript no matter which language was spoken.
-
-You can enable word level timestamps output by `word_timestamps` parameter (only with `Faster Whisper` for now).
-
-Returns a json with following fields:
-
-- **text**: Contains the full transcript
-- **segments**: Contains an entry per segment. Each entry provides `timestamps`, `transcript`, `token ids`, `word level timestamps` and other metadata
-- **language**: Detected or provided language (as a language code)
-
-## Language detection service /detect-language
-
-Detects the language spoken in the uploaded file. For longer files it only processes first 30 seconds.
-
-Returns a json with following fields:
-
-- **detected_language**
-- **language_code**
-
-## Build
-
-Build .whl package
-
-```sh
-poetry build
-```
-
-Configuring the ASR Engine
-
-```sh
-export ASR_ENGINE=openai_whisper
-```
-or
-```sh
-export ASR_ENGINE=faster_whisper
-```
-
-Configuring the Model
-
-```sh
-export ASR_MODEL=base
-```
-
-## Docker Build
-
-### For CPU
-
-```sh
-# Build Image
-docker build -t whisper-asr-webservice .
-
-# Run Container
-docker run -d -p 9000:9000 whisper-asr-webservice
-# or
-docker run -d -p 9001:9000 -e ASR_MODEL=base whisper-asr-webservice3
-```
-
-### For GPU
-
-```sh
-# Build Image
-docker build -f Dockerfile.gpu -t whisper-asr-webservice-gpu .
-
-# Run Container
-docker run -d --gpus all -p 9000:9000 whisper-asr-webservice-gpu
-# or
-docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-gpu
-```
-
-## Cache
-The ASR model is downloaded each time you start the container, using the large model this can take some time. 
-If you want to decrease the time it takes to start your container by skipping the download, you can store the cache directory (`~/.cache/whisper`) to a persistent storage. 
-Next time you start your container the ASR Model will be taken from the cache instead of being downloaded again.
-
-**Important this will prevent you from receiving any updates to the models.**
- 
-```sh
-docker run -d -p 9000:9000 -v ./yourlocaldir:~/.cache/whisper onerahmet/openai-whisper-asr-webservice:latest
-```
-
-or
-
-```sh
-docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v ./yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest
-```
+# Documentation
+Explore the documentation by clicking [here](https://ahmetoner.github.io/whisper-asr-webservice).
diff --git a/docs/.overrides/main.html b/docs/.overrides/main.html
new file mode 100644
index 0000000..fec6239
--- /dev/null
+++ b/docs/.overrides/main.html
@@ -0,0 +1,12 @@
+{% extends "base.html" %}
+
+{% block announce %}
+
+For updates follow <strong>@ahmetoner</strong> on
+<a rel="me" href="https://github.com/ahmetoner">
+    <span class="twemoji github">
+      {% include ".icons/fontawesome/brands/github.svg" %}
+    </span>
+    <strong>GitHub</strong>
+</a>
+{% endblock %}
\ No newline at end of file
diff --git a/docs/assets/css/extra.css b/docs/assets/css/extra.css
new file mode 100644
index 0000000..8b14872
--- /dev/null
+++ b/docs/assets/css/extra.css
@@ -0,0 +1,5 @@
+:root {
+    --md-primary-fg-color:        #3d6178;
+    --md-primary-fg-color--light: #3d6178;
+    --md-primary-fg-color--dark:  #3d6178;
+}
diff --git a/docs/assets/img/swagger-ui.png b/docs/assets/images/swagger-ui.png
similarity index 100%
rename from docs/assets/img/swagger-ui.png
rename to docs/assets/images/swagger-ui.png
diff --git a/docs/build.md b/docs/build.md
new file mode 100644
index 0000000..01f97a5
--- /dev/null
+++ b/docs/build.md
@@ -0,0 +1,80 @@
+## Development Environment
+
+Install poetry with following command:
+
+```sh
+pip3 install poetry
+```
+
+Install torch with following command:
+
+```sh
+# just for GPU:
+pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch
+```
+
+### Run
+
+Install packages:
+
+```sh
+poetry install
+```
+
+Starting the Webservice:
+
+```sh
+poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker
+```
+
+### Build
+
+=== ":octicons-file-code-16: `Poetry`"
+
+    Build .whl package
+    
+    ```sh
+    poetry build
+    ```
+=== ":octicons-file-code-16: `Docker`"
+
+    With `Dockerfile`:
+
+    === ":octicons-file-code-16: `CPU`"
+    
+        ```sh
+        # Build Image
+        docker build -t whisper-asr-webservice .
+        
+        # Run Container
+        docker run -d -p 9000:9000 whisper-asr-webservice
+        # or
+        docker run -d -p 9001:9000 -e ASR_MODEL=base whisper-asr-webservice3
+        ```
+    
+    === ":octicons-file-code-16: `GPU`"
+    
+        ```sh
+        # Build Image
+        docker build -f Dockerfile.gpu -t whisper-asr-webservice-gpu .
+        
+        # Run Container
+        docker run -d --gpus all -p 9000:9000 whisper-asr-webservice-gpu
+        # or
+        docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-gpu
+        ```
+
+    With `docker-compose`:
+    
+    === ":octicons-file-code-16: `CPU`"
+    
+        ```sh
+        docker-compose up --build
+        ```
+    
+    === ":octicons-file-code-16: `GPU`"
+    
+        ```sh
+        docker-compose up --build -f docker-compose.gpu.yml
+        ```
+
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 0000000..786b75d
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1 @@
+--8<-- "CHANGELOG.md"
diff --git a/docs/endpoints.md b/docs/endpoints.md
new file mode 100644
index 0000000..ccd22b0
--- /dev/null
+++ b/docs/endpoints.md
@@ -0,0 +1,37 @@
+## Quick start
+
+After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs)
+
+There are 2 endpoints available:
+
+- /asr (TXT, VTT, SRT, TSV, JSON)
+- /detect-language
+
+## Automatic Speech recognition service /asr
+
+If you choose the **transcribe** task, transcribes the uploaded file. Both audio and video files are supported (as long as ffmpeg supports it).
+
+Note that you can also upload video formats directly as long as they are supported by ffmpeg.
+
+You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint.
+
+You can provide the language or it will be automatically recognized.
+
+If you choose the **translate** task it will provide an English transcript no matter which language was spoken.
+
+You can enable word level timestamps output by `word_timestamps` parameter (only with `Faster Whisper` for now).
+
+Returns a json with following fields:
+
+- **text**: Contains the full transcript
+- **segments**: Contains an entry per segment. Each entry provides `timestamps`, `transcript`, `token ids`, `word level timestamps` and other metadata
+- **language**: Detected or provided language (as a language code)
+
+## Language detection service /detect-language
+
+Detects the language spoken in the uploaded file. For longer files it only processes first 30 seconds.
+
+Returns a json with following fields:
+
+- **detected_language**
+- **language_code**
\ No newline at end of file
diff --git a/docs/environmental-variables.md b/docs/environmental-variables.md
new file mode 100644
index 0000000..cc631b0
--- /dev/null
+++ b/docs/environmental-variables.md
@@ -0,0 +1,27 @@
+### Configuring the `Engine`
+
+=== ":octicons-file-code-16: `openai_whisper`"
+    ```sh
+    export ASR_ENGINE=openai_whisper
+    ```
+=== ":octicons-file-code-16: `faster_whisper`"
+    ```sh
+    export ASR_ENGINE=faster_whisper
+    ```
+
+### Configuring the `Model`
+
+```sh
+export ASR_MODEL=base
+```
+
+Available ASR_MODELs are `tiny`, `base`, `small`, `medium`, `large` (only OpenAI Whisper), `large-v1` and `large-v2`. Please note that `large` and `large-v2` are the same model.
+
+For English-only applications, the `.en` models tend to perform better, especially for the `tiny.en` and `base.en` models. We observed that the difference becomes less significant for the `small.en` and `medium.en` models.
+
+
+### Configuring the `Model Path`
+
+```sh
+export ASR_MODEL_PATH=/data/whisper
+```
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..14969d2
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,7 @@
+Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multitask model that can perform multilingual speech recognition as well as speech translation and language identification.
+
+## Features
+Current release (v1.2.0) supports following whisper models:
+
+- [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918)
+- [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0)
diff --git a/docs/licence.md b/docs/licence.md
new file mode 100644
index 0000000..becad7a
--- /dev/null
+++ b/docs/licence.md
@@ -0,0 +1,5 @@
+# Licence
+
+```
+--8<-- "LICENCE"
+```
diff --git a/docs/run.md b/docs/run.md
new file mode 100644
index 0000000..1e63fed
--- /dev/null
+++ b/docs/run.md
@@ -0,0 +1,53 @@
+## Usage
+
+Whisper ASR Webservice now available on Docker Hub. You can find the latest version of this repository on docker hub for CPU and GPU.
+
+Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice>
+
+=== ":octicons-file-code-16: `CPU`"
+
+    ```sh
+    docker pull onerahmet/openai-whisper-asr-webservice:latest
+    docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
+    ```
+
+=== ":octicons-file-code-16: `CPU (macOS)`"
+
+    > GPU passthrough does not work on macOS due to fundamental design limitations of Docker. Docker actually runs containers within a LinuxVM on macOS. If you wish to run GPU-accelerated containers, I'm afraid Linux is your only option.
+    > 
+    > The `:latest` image tag provides both amd64 and arm64 architectures:
+    
+    ```sh
+    docker pull onerahmet/openai-whisper-asr-webservice:latest
+    docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest
+    ```
+
+=== ":octicons-file-code-16: `GPU`"
+
+    ```sh
+    docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu
+    docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu
+    ```
+
+> Interactive Swagger API documentation is available at http://localhost:9000/docs
+
+![Swagger UI](assets/images/swagger-ui.png)
+
+## Cache
+The ASR model is downloaded each time you start the container, using the large model this can take some time. 
+If you want to decrease the time it takes to start your container by skipping the download, you can store the cache directory (`~/.cache/whisper`) to a persistent storage. 
+Next time you start your container the ASR Model will be taken from the cache instead of being downloaded again.
+
+**Important this will prevent you from receiving any updates to the models.**
+
+=== ":octicons-file-code-16: `Default cache dir`"
+
+    ```sh
+    docker run -d -p 9000:9000 -v ./yourlocaldir:~/.cache/whisper onerahmet/openai-whisper-asr-webservice:latest
+    ```
+
+=== ":octicons-file-code-16: `With ASR_MODEL_PATH`"
+
+    ```sh
+    docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v $PWD/yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest
+    ```
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..5bcc8a1
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,87 @@
+site_name: Whisper ASR Webservice
+site_url: https://ahmetoner.github.io/whisper-asr-webservice
+site_dir: public
+
+site_description: "OpenAI Whisper ASR Webservice API"
+repo_url: "https://github.com/ahmetoner/whisper-asr-webservice"
+repo_name: "ahmetoner/whisper-asr-webservice"
+copyright: Copyright &copy; 2023
+edit_uri: edit/main/docs/
+
+validation:
+  omitted_files: warn
+  absolute_links: warn
+  unrecognized_links: warn
+
+nav:
+  - Home:
+      - Whisper ASR Webservice: index.md
+      - Run: run.md
+      - Endpoints: endpoints.md
+      - Environmental Variables: environmental-variables.md
+      - Build: build.md
+      - Changelog: changelog.md
+      - Licence: licence.md
+  - Releases: https://github.com/ahmetoner/whisper-asr-webservice/releases
+  - Docker Hub: https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice
+
+theme:
+  name: material
+  custom_dir: docs/.overrides
+  icon:
+    logo: material/subtitles
+  features:
+    - announce.dismiss
+    - content.action.edit
+    - content.action.view
+    - content.code.annotate
+    - content.code.copy
+    - content.tooltips
+    - navigation.footer
+    - navigation.indexes
+    - navigation.sections
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - navigation.top
+    - search.highlight
+    - search.suggest
+    - toc.follow
+
+extra_css:
+  - assets/css/extra.css
+markdown_extensions:
+  - attr_list
+  - admonition
+  - footnotes
+  - pymdownx.emoji:
+      emoji_index: !!python/name:materialx.emoji.twemoji
+      emoji_generator: !!python/name:materialx.emoji.to_svg
+  - pymdownx.magiclink
+  - pymdownx.snippets:
+      check_paths: true
+      dedent_subsections: true
+  - pymdownx.superfences
+  - pymdownx.tabbed:
+      alternate_style: true
+      slugify: !!python/object/apply:pymdownx.slugs.slugify
+        kwds:
+          case: lower
+  - pymdownx.tasklist:
+      custom_checkbox: true
+  - toc:
+      permalink: "ΒΆ"
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
+
+plugins:
+  - search
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/ahmetoner
+    - icon: fontawesome/brands/docker
+      link: https://hub.docker.com/u/onerahmet
-- 
GitLab