From 7bf22dd672853523cb160c4b650f2ea8996d7d30 Mon Sep 17 00:00:00 2001 From: Ahmet Oner <me@ahmetoner.com> Date: Mon, 2 Oct 2023 01:20:19 +0200 Subject: [PATCH] Add MkDocs --- .github/workflows/documentation.yml | 26 +++ .gitignore | 4 +- README.md | 184 +-------------------- docs/.overrides/main.html | 12 ++ docs/assets/css/extra.css | 5 + docs/assets/{img => images}/swagger-ui.png | Bin docs/build.md | 80 +++++++++ docs/changelog.md | 1 + docs/endpoints.md | 37 +++++ docs/environmental-variables.md | 27 +++ docs/index.md | 7 + docs/licence.md | 5 + docs/run.md | 53 ++++++ mkdocs.yml | 87 ++++++++++ 14 files changed, 345 insertions(+), 183 deletions(-) create mode 100644 .github/workflows/documentation.yml create mode 100644 docs/.overrides/main.html create mode 100644 docs/assets/css/extra.css rename docs/assets/{img => images}/swagger-ui.png (100%) create mode 100644 docs/build.md create mode 100644 docs/changelog.md create mode 100644 docs/endpoints.md create mode 100644 docs/environmental-variables.md create mode 100644 docs/index.md create mode 100644 docs/licence.md create mode 100644 docs/run.md create mode 100644 mkdocs.yml diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..fee42de --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,26 @@ +name: CI +on: + push: + branches: + - main + - docs +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + if: github.event.repository.fork == false + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v3 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material pymdown-extensions + - run: mkdocs gh-deploy --force diff --git a/.gitignore b/.gitignore index 2e77e32..4dbf939 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,6 @@ MANIFEST.in pip-wheel-metadata /poetry.toml -poetry/core/* \ No newline at end of file +poetry/core/* + +public \ No newline at end of file diff --git a/README.md b/README.md index 2198f63..3b171b7 100644 --- a/README.md +++ b/README.md @@ -12,185 +12,5 @@ Current release (v1.2.0) supports following whisper models: - [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918) - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) -## Usage - -Whisper ASR Webservice now available on Docker Hub. You can find the latest version of this repository on docker hub for CPU and GPU. - -Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice> - -For CPU: - -```sh -docker pull onerahmet/openai-whisper-asr-webservice:latest -docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest -``` - -For GPU: - -```sh -docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu -docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu -``` - -For macOS (CPU only): - -GPU passthrough does not work on macOS due to fundamental design limitations of Docker. Docker actually runs containers within a LinuxVM on macOS. If you wish to run GPU-accelerated containers, I'm afraid Linux is your only option. - -The `:latest` image tag provides both amd64 and arm64 architectures: - -```sh -docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest -``` - -```sh -# Interactive Swagger API documentation is available at http://localhost:9000/docs -``` - - -Available ASR_MODELs are `tiny`, `base`, `small`, `medium`, `large`, `large-v1` and `large-v2`. Please note that `large` and `large-v2` are the same model. - -For English-only applications, the `.en` models tend to perform better, especially for the `tiny.en` and `base.en` models. We observed that the difference becomes less significant for the `small.en` and `medium.en` models. - -## Run (Development Environment) - -Install poetry with following command: - -```sh -pip3 install poetry -``` - -Install torch with following command: - -```sh -# just for GPU: -pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch -``` - -Install packages: - -```sh -poetry install -``` - -Starting the Webservice: - -```sh -poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker -``` - -With docker compose: - -For CPU: -```sh -docker-compose up --build -``` - -For GPU: -```sh -docker-compose up --build -f docker-compose.gpu.yml -``` - -## Quick start - -After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs) - -There are 2 endpoints available: - -- /asr (TXT, VTT, SRT, TSV, JSON) -- /detect-language - -## Automatic Speech recognition service /asr - -If you choose the **transcribe** task, transcribes the uploaded file. Both audio and video files are supported (as long as ffmpeg supports it). - -Note that you can also upload video formats directly as long as they are supported by ffmpeg. - -You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint. - -You can provide the language or it will be automatically recognized. - -If you choose the **translate** task it will provide an English transcript no matter which language was spoken. - -You can enable word level timestamps output by `word_timestamps` parameter (only with `Faster Whisper` for now). - -Returns a json with following fields: - -- **text**: Contains the full transcript -- **segments**: Contains an entry per segment. Each entry provides `timestamps`, `transcript`, `token ids`, `word level timestamps` and other metadata -- **language**: Detected or provided language (as a language code) - -## Language detection service /detect-language - -Detects the language spoken in the uploaded file. For longer files it only processes first 30 seconds. - -Returns a json with following fields: - -- **detected_language** -- **language_code** - -## Build - -Build .whl package - -```sh -poetry build -``` - -Configuring the ASR Engine - -```sh -export ASR_ENGINE=openai_whisper -``` -or -```sh -export ASR_ENGINE=faster_whisper -``` - -Configuring the Model - -```sh -export ASR_MODEL=base -``` - -## Docker Build - -### For CPU - -```sh -# Build Image -docker build -t whisper-asr-webservice . - -# Run Container -docker run -d -p 9000:9000 whisper-asr-webservice -# or -docker run -d -p 9001:9000 -e ASR_MODEL=base whisper-asr-webservice3 -``` - -### For GPU - -```sh -# Build Image -docker build -f Dockerfile.gpu -t whisper-asr-webservice-gpu . - -# Run Container -docker run -d --gpus all -p 9000:9000 whisper-asr-webservice-gpu -# or -docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-gpu -``` - -## Cache -The ASR model is downloaded each time you start the container, using the large model this can take some time. -If you want to decrease the time it takes to start your container by skipping the download, you can store the cache directory (`~/.cache/whisper`) to a persistent storage. -Next time you start your container the ASR Model will be taken from the cache instead of being downloaded again. - -**Important this will prevent you from receiving any updates to the models.** - -```sh -docker run -d -p 9000:9000 -v ./yourlocaldir:~/.cache/whisper onerahmet/openai-whisper-asr-webservice:latest -``` - -or - -```sh -docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v ./yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest -``` +# Documentation +Explore the documentation by clicking [here](https://ahmetoner.github.io/whisper-asr-webservice). diff --git a/docs/.overrides/main.html b/docs/.overrides/main.html new file mode 100644 index 0000000..fec6239 --- /dev/null +++ b/docs/.overrides/main.html @@ -0,0 +1,12 @@ +{% extends "base.html" %} + +{% block announce %} + +For updates follow <strong>@ahmetoner</strong> on +<a rel="me" href="https://github.com/ahmetoner"> + <span class="twemoji github"> + {% include ".icons/fontawesome/brands/github.svg" %} + </span> + <strong>GitHub</strong> +</a> +{% endblock %} \ No newline at end of file diff --git a/docs/assets/css/extra.css b/docs/assets/css/extra.css new file mode 100644 index 0000000..8b14872 --- /dev/null +++ b/docs/assets/css/extra.css @@ -0,0 +1,5 @@ +:root { + --md-primary-fg-color: #3d6178; + --md-primary-fg-color--light: #3d6178; + --md-primary-fg-color--dark: #3d6178; +} diff --git a/docs/assets/img/swagger-ui.png b/docs/assets/images/swagger-ui.png similarity index 100% rename from docs/assets/img/swagger-ui.png rename to docs/assets/images/swagger-ui.png diff --git a/docs/build.md b/docs/build.md new file mode 100644 index 0000000..01f97a5 --- /dev/null +++ b/docs/build.md @@ -0,0 +1,80 @@ +## Development Environment + +Install poetry with following command: + +```sh +pip3 install poetry +``` + +Install torch with following command: + +```sh +# just for GPU: +pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch +``` + +### Run + +Install packages: + +```sh +poetry install +``` + +Starting the Webservice: + +```sh +poetry run gunicorn --bind 0.0.0.0:9000 --workers 1 --timeout 0 app.webservice:app -k uvicorn.workers.UvicornWorker +``` + +### Build + +=== ":octicons-file-code-16: `Poetry`" + + Build .whl package + + ```sh + poetry build + ``` +=== ":octicons-file-code-16: `Docker`" + + With `Dockerfile`: + + === ":octicons-file-code-16: `CPU`" + + ```sh + # Build Image + docker build -t whisper-asr-webservice . + + # Run Container + docker run -d -p 9000:9000 whisper-asr-webservice + # or + docker run -d -p 9001:9000 -e ASR_MODEL=base whisper-asr-webservice3 + ``` + + === ":octicons-file-code-16: `GPU`" + + ```sh + # Build Image + docker build -f Dockerfile.gpu -t whisper-asr-webservice-gpu . + + # Run Container + docker run -d --gpus all -p 9000:9000 whisper-asr-webservice-gpu + # or + docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base whisper-asr-webservice-gpu + ``` + + With `docker-compose`: + + === ":octicons-file-code-16: `CPU`" + + ```sh + docker-compose up --build + ``` + + === ":octicons-file-code-16: `GPU`" + + ```sh + docker-compose up --build -f docker-compose.gpu.yml + ``` + diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..786b75d --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1 @@ +--8<-- "CHANGELOG.md" diff --git a/docs/endpoints.md b/docs/endpoints.md new file mode 100644 index 0000000..ccd22b0 --- /dev/null +++ b/docs/endpoints.md @@ -0,0 +1,37 @@ +## Quick start + +After running the docker image interactive Swagger API documentation is available at [localhost:9000/docs](http://localhost:9000/docs) + +There are 2 endpoints available: + +- /asr (TXT, VTT, SRT, TSV, JSON) +- /detect-language + +## Automatic Speech recognition service /asr + +If you choose the **transcribe** task, transcribes the uploaded file. Both audio and video files are supported (as long as ffmpeg supports it). + +Note that you can also upload video formats directly as long as they are supported by ffmpeg. + +You can get TXT, VTT, SRT, TSV and JSON output as a file from /asr endpoint. + +You can provide the language or it will be automatically recognized. + +If you choose the **translate** task it will provide an English transcript no matter which language was spoken. + +You can enable word level timestamps output by `word_timestamps` parameter (only with `Faster Whisper` for now). + +Returns a json with following fields: + +- **text**: Contains the full transcript +- **segments**: Contains an entry per segment. Each entry provides `timestamps`, `transcript`, `token ids`, `word level timestamps` and other metadata +- **language**: Detected or provided language (as a language code) + +## Language detection service /detect-language + +Detects the language spoken in the uploaded file. For longer files it only processes first 30 seconds. + +Returns a json with following fields: + +- **detected_language** +- **language_code** \ No newline at end of file diff --git a/docs/environmental-variables.md b/docs/environmental-variables.md new file mode 100644 index 0000000..cc631b0 --- /dev/null +++ b/docs/environmental-variables.md @@ -0,0 +1,27 @@ +### Configuring the `Engine` + +=== ":octicons-file-code-16: `openai_whisper`" + ```sh + export ASR_ENGINE=openai_whisper + ``` +=== ":octicons-file-code-16: `faster_whisper`" + ```sh + export ASR_ENGINE=faster_whisper + ``` + +### Configuring the `Model` + +```sh +export ASR_MODEL=base +``` + +Available ASR_MODELs are `tiny`, `base`, `small`, `medium`, `large` (only OpenAI Whisper), `large-v1` and `large-v2`. Please note that `large` and `large-v2` are the same model. + +For English-only applications, the `.en` models tend to perform better, especially for the `tiny.en` and `base.en` models. We observed that the difference becomes less significant for the `small.en` and `medium.en` models. + + +### Configuring the `Model Path` + +```sh +export ASR_MODEL_PATH=/data/whisper +``` \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..14969d2 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,7 @@ +Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multitask model that can perform multilingual speech recognition as well as speech translation and language identification. + +## Features +Current release (v1.2.0) supports following whisper models: + +- [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918) +- [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) diff --git a/docs/licence.md b/docs/licence.md new file mode 100644 index 0000000..becad7a --- /dev/null +++ b/docs/licence.md @@ -0,0 +1,5 @@ +# Licence + +``` +--8<-- "LICENCE" +``` diff --git a/docs/run.md b/docs/run.md new file mode 100644 index 0000000..1e63fed --- /dev/null +++ b/docs/run.md @@ -0,0 +1,53 @@ +## Usage + +Whisper ASR Webservice now available on Docker Hub. You can find the latest version of this repository on docker hub for CPU and GPU. + +Docker Hub: <https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice> + +=== ":octicons-file-code-16: `CPU`" + + ```sh + docker pull onerahmet/openai-whisper-asr-webservice:latest + docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest + ``` + +=== ":octicons-file-code-16: `CPU (macOS)`" + + > GPU passthrough does not work on macOS due to fundamental design limitations of Docker. Docker actually runs containers within a LinuxVM on macOS. If you wish to run GPU-accelerated containers, I'm afraid Linux is your only option. + > + > The `:latest` image tag provides both amd64 and arm64 architectures: + + ```sh + docker pull onerahmet/openai-whisper-asr-webservice:latest + docker run -d -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest + ``` + +=== ":octicons-file-code-16: `GPU`" + + ```sh + docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu + docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu + ``` + +> Interactive Swagger API documentation is available at http://localhost:9000/docs + + + +## Cache +The ASR model is downloaded each time you start the container, using the large model this can take some time. +If you want to decrease the time it takes to start your container by skipping the download, you can store the cache directory (`~/.cache/whisper`) to a persistent storage. +Next time you start your container the ASR Model will be taken from the cache instead of being downloaded again. + +**Important this will prevent you from receiving any updates to the models.** + +=== ":octicons-file-code-16: `Default cache dir`" + + ```sh + docker run -d -p 9000:9000 -v ./yourlocaldir:~/.cache/whisper onerahmet/openai-whisper-asr-webservice:latest + ``` + +=== ":octicons-file-code-16: `With ASR_MODEL_PATH`" + + ```sh + docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v $PWD/yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest + ``` diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..5bcc8a1 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,87 @@ +site_name: Whisper ASR Webservice +site_url: https://ahmetoner.github.io/whisper-asr-webservice +site_dir: public + +site_description: "OpenAI Whisper ASR Webservice API" +repo_url: "https://github.com/ahmetoner/whisper-asr-webservice" +repo_name: "ahmetoner/whisper-asr-webservice" +copyright: Copyright © 2023 +edit_uri: edit/main/docs/ + +validation: + omitted_files: warn + absolute_links: warn + unrecognized_links: warn + +nav: + - Home: + - Whisper ASR Webservice: index.md + - Run: run.md + - Endpoints: endpoints.md + - Environmental Variables: environmental-variables.md + - Build: build.md + - Changelog: changelog.md + - Licence: licence.md + - Releases: https://github.com/ahmetoner/whisper-asr-webservice/releases + - Docker Hub: https://hub.docker.com/r/onerahmet/openai-whisper-asr-webservice + +theme: + name: material + custom_dir: docs/.overrides + icon: + logo: material/subtitles + features: + - announce.dismiss + - content.action.edit + - content.action.view + - content.code.annotate + - content.code.copy + - content.tooltips + - navigation.footer + - navigation.indexes + - navigation.sections + - navigation.tabs + - navigation.tabs.sticky + - navigation.top + - search.highlight + - search.suggest + - toc.follow + +extra_css: + - assets/css/extra.css +markdown_extensions: + - attr_list + - admonition + - footnotes + - pymdownx.emoji: + emoji_index: !!python/name:materialx.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg + - pymdownx.magiclink + - pymdownx.snippets: + check_paths: true + dedent_subsections: true + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + slugify: !!python/object/apply:pymdownx.slugs.slugify + kwds: + case: lower + - pymdownx.tasklist: + custom_checkbox: true + - toc: + permalink: "ΒΆ" + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + +plugins: + - search + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/ahmetoner + - icon: fontawesome/brands/docker + link: https://hub.docker.com/u/onerahmet -- GitLab