From 8db9450c4d16b4c808d5f06f73310f09eb3e3af6 Mon Sep 17 00:00:00 2001 From: Ahmet Oner <ahmetn8@gmail.com> Date: Sun, 1 Oct 2023 22:53:55 +0200 Subject: [PATCH] Release 1.2.0 --- CHANGELOG.md | 28 +++++++++++++++++----------- Dockerfile | 2 +- Dockerfile.gpu | 4 ++-- README.md | 6 +++--- poetry.lock | 38 -------------------------------------- pyproject.toml | 2 +- 6 files changed, 24 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b8bcb4..b8c5666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,21 +4,25 @@ Changelog Unreleased ---------- -### Updated - -- Updated model conversion method (for Faster Whisper) to use Hugging Face downloader -- Updated default model paths to `~/.cache/whisper`. - - For customization, modify the `ASR_MODEL_PATH` environment variable. - - Ensure Docker volume is set for the corresponding directory to use caching. - ```bash - docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v ./yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest - ``` +[1.2.0] (2023-10-01) +-------------------- ### Changed - Upgraded - - [openai/whisper](https://github.com/openai/whisper) to [v20230918](https://github.com/openai/whisper/releases/tag/v20230918) - - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper) to [v0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) + - [openai/whisper](https://github.com/openai/whisper) to [v20230918](https://github.com/openai/whisper/releases/tag/v20230918) + - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper) to [v0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) + +### Updated + +- Updated model conversion method (for Faster Whisper) to use Hugging Face downloader +- Updated default model paths to `~/.cache/whisper`. + - For customization, modify the `ASR_MODEL_PATH` environment variable. + - Ensure Docker volume is set for the corresponding directory to use caching. + ```bash + docker run -d -p 9000:9000 -e ASR_MODEL_PATH=/data/whisper -v ./yourlocaldir:/data/whisper onerahmet/openai-whisper-asr-webservice:latest + ``` +- Removed the `triton` dependency from `poetry.lock` to ensure the stability of the pipeline for `ARM-based` Docker images [1.1.1] (2023-05-29) -------------------- @@ -109,6 +113,8 @@ Unreleased - mp3 support by using ffmpeg instead of librosa in #8 - add language detection endpoint in #9 +[1.2.0]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.2.0 + [1.1.1]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.1.1 [1.1.0]: https://github.com/ahmetoner/whisper-asr-webservice/releases/tag/v1.1.0 diff --git a/Dockerfile b/Dockerfile index 95bfdd4..42d638c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ RUN python3 -m venv $POETRY_VENV \ && $POETRY_VENV/bin/pip install -U pip setuptools \ - && $POETRY_VENV/bin/pip install poetry==1.4.0 + && $POETRY_VENV/bin/pip install poetry==1.6.1 ENV PATH="${PATH}:${POETRY_VENV}/bin" diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 817eecf..c3a8eee 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,5 +1,5 @@ FROM swaggerapi/swagger-ui:v4.18.2 AS swagger-ui -FROM nvidia/cuda:11.7.0-base-ubuntu22.04 +FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 ENV PYTHON_VERSION=3.10 ENV POETRY_VENV=/app/.venv @@ -19,7 +19,7 @@ RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \ RUN python3 -m venv $POETRY_VENV \ && $POETRY_VENV/bin/pip install -U pip setuptools \ - && $POETRY_VENV/bin/pip install poetry==1.4.0 + && $POETRY_VENV/bin/pip install poetry==1.6.1 ENV PATH="${PATH}:${POETRY_VENV}/bin" diff --git a/README.md b/README.md index c714fe5..2198f63 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@  # Whisper ASR Webservice -Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification. For more details: [github.com/openai/whisper](https://github.com/openai/whisper/) +Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multitask model that can perform multilingual speech recognition as well as speech translation and language identification. For more details: [github.com/openai/whisper](https://github.com/openai/whisper/) ## Features -Current release (v1.1.1) supports following whisper models: +Current release (v1.2.0) supports following whisper models: - [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918) - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) @@ -32,7 +32,7 @@ docker pull onerahmet/openai-whisper-asr-webservice:latest-gpu docker run -d --gpus all -p 9000:9000 -e ASR_MODEL=base -e ASR_ENGINE=openai_whisper onerahmet/openai-whisper-asr-webservice:latest-gpu ``` -For MacOS (CPU only): +For macOS (CPU only): GPU passthrough does not work on macOS due to fundamental design limitations of Docker. Docker actually runs containers within a LinuxVM on macOS. If you wish to run GPU-accelerated containers, I'm afraid Linux is your only option. diff --git a/poetry.lock b/poetry.lock index 274851c..95b91ab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -770,7 +770,6 @@ numpy = "*" tiktoken = "0.3.3" torch = "*" tqdm = "*" -triton = "2.0.0" [package.extras] dev = ["black", "flake8", "isort", "pytest", "scipy"] @@ -1386,43 +1385,6 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] -[[package]] -name = "triton" -version = "2.0.0" -description = "A language and compiler for custom Deep Learning operations" -optional = false -python-versions = "*" -files = [ - {file = "triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38806ee9663f4b0f7cd64790e96c579374089e58f49aac4a6608121aa55e2505"}, - {file = "triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:226941c7b8595219ddef59a1fdb821e8c744289a132415ddd584facedeb475b1"}, - {file = "triton-2.0.0-1-cp36-cp36m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4c9fc8c89874bc48eb7e7b2107a9b8d2c0bf139778637be5bfccb09191685cfd"}, - {file = "triton-2.0.0-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d2684b6a60b9f174f447f36f933e9a45f31db96cb723723ecd2dcfd1c57b778b"}, - {file = "triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9d4978298b74fcf59a75fe71e535c092b023088933b2f1df933ec32615e4beef"}, - {file = "triton-2.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:74f118c12b437fb2ca25e1a04759173b517582fcf4c7be11913316c764213656"}, - {file = "triton-2.0.0-1-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9618815a8da1d9157514f08f855d9e9ff92e329cd81c0305003eb9ec25cc5add"}, - {file = "triton-2.0.0-1-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1aca3303629cd3136375b82cb9921727f804e47ebee27b2677fef23005c3851a"}, - {file = "triton-2.0.0-1-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e3e13aa8b527c9b642e3a9defcc0fbd8ffbe1c80d8ac8c15a01692478dc64d8a"}, - {file = "triton-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f05a7e64e4ca0565535e3d5d3405d7e49f9d308505bb7773d21fb26a4c008c2"}, - {file = "triton-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4b99ca3c6844066e516658541d876c28a5f6e3a852286bbc97ad57134827fd"}, - {file = "triton-2.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47b4d70dc92fb40af553b4460492c31dc7d3a114a979ffb7a5cdedb7eb546c08"}, - {file = "triton-2.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fedce6a381901b1547e0e7e1f2546e4f65dca6d91e2d8a7305a2d1f5551895be"}, - {file = "triton-2.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75834f27926eab6c7f00ce73aaf1ab5bfb9bec6eb57ab7c0bfc0a23fac803b4c"}, - {file = "triton-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0117722f8c2b579cd429e0bee80f7731ae05f63fe8e9414acd9a679885fcbf42"}, - {file = "triton-2.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcd9be5d0c2e45d2b7e6ddc6da20112b6862d69741576f9c3dbaf941d745ecae"}, - {file = "triton-2.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42a0d2c3fc2eab4ba71384f2e785fbfd47aa41ae05fa58bf12cb31dcbd0aeceb"}, - {file = "triton-2.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c47b72c72693198163ece9d90a721299e4fb3b8e24fd13141e384ad952724f"}, -] - -[package.dependencies] -cmake = "*" -filelock = "*" -lit = "*" -torch = "*" - -[package.extras] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)"] -tutorials = ["matplotlib", "pandas", "tabulate"] - [[package]] name = "typing-extensions" version = "4.5.0" diff --git a/pyproject.toml b/pyproject.toml index 0d8909f..a656bd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "whisper-asr-webservice" -version = "1.1.1" +version = "1.2.0" description = "Whisper ASR Webservice is a general-purpose speech recognition webservice." homepage = "https://github.com/ahmetoner/whisper-asr-webservice/" license = "https://github.com/ahmetoner/whisper-asr-webservice/blob/main/LICENCE" -- GitLab