diff --git a/CHANGELOG.md b/CHANGELOG.md index 677ef3d701b3e20ac4b2e1e06fd169e0fdf6ac2c..90f5592f2fc472f7ea5170e2ec1d6f1a8a07405a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ Changelog Unreleased ---------- +### Changed + +- Upgraded + - [openai/whisper](https://github.com/openai/whisper) to [v20231106](https://github.com/openai/whisper/releases/tag/v20231106) + + [1.2.2] (2023-11-03) -------------------- diff --git a/README.md b/README.md index 46443ddc57531c084de0f80288cb68ce40061a8a..a9521d551fea73099724ba807a4bdec4e2820561 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Whisper is a general-purpose speech recognition model. It is trained on a large ## Features Current release (v1.2.2) supports following whisper models: -- [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918) +- [openai/whisper](https://github.com/openai/whisper)@[v20231106](https://github.com/openai/whisper/releases/tag/v20231106) - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) diff --git a/docs/environmental-variables.md b/docs/environmental-variables.md index cc631b0eb758f5904ff62eeee1c971496fc3f527..c8f7c48cd265a2de787919063b43d65fbf1d401c 100644 --- a/docs/environmental-variables.md +++ b/docs/environmental-variables.md @@ -15,7 +15,7 @@ export ASR_MODEL=base ``` -Available ASR_MODELs are `tiny`, `base`, `small`, `medium`, `large` (only OpenAI Whisper), `large-v1` and `large-v2`. Please note that `large` and `large-v2` are the same model. +Available ASR_MODELs are `tiny`, `base`, `small`, `medium`, `large` (only OpenAI Whisper), `large-v1`, `large-v2` and `large-v3` (only OpenAI Whisper for now). For English-only applications, the `.en` models tend to perform better, especially for the `tiny.en` and `base.en` models. We observed that the difference becomes less significant for the `small.en` and `medium.en` models. diff --git a/docs/index.md b/docs/index.md index 2f2d2fe6abc031a6ee3ea020213e58d229a11fd9..cb5fabfd5aa145a8b6f79c5afaea71d1bdd45c9f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,7 +3,7 @@ Whisper is a general-purpose speech recognition model. It is trained on a large ## Features Current release (v1.2.2) supports following whisper models: -- [openai/whisper](https://github.com/openai/whisper)@[v20230918](https://github.com/openai/whisper/releases/tag/v20230918) +- [openai/whisper](https://github.com/openai/whisper)@[v20231106](https://github.com/openai/whisper/releases/tag/v20231106) - [guillaumekln/faster-whisper](https://github.com/guillaumekln/faster-whisper)@[0.9.0](https://github.com/guillaumekln/faster-whisper/releases/tag/v0.9.0) ## Quick Usage diff --git a/poetry.lock b/poetry.lock index 0b35a4a75e62f9fabc32b6fe24e51da822d6c516..36f19bbf3a709cb49e6ae2735c203b1f6694294b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -757,19 +757,19 @@ sympy = "*" [[package]] name = "openai-whisper" -version = "20230918" +version = "20231106" description = "Robust Speech Recognition via Large-Scale Weak Supervision" optional = false python-versions = ">=3.8" files = [ - {file = "openai-whisper-20230918.tar.gz", hash = "sha256:32a1ee39c3faaf6c719e3a83f1aacc8e164aad87976350371e26845271287c30"}, + {file = "openai-whisper-20231106.tar.gz", hash = "sha256:9d1de7fa1e766b9adf8be4bfa7fb11e2bdf8d2b0bf77b90478cf4d75e0e58d19"}, ] [package.dependencies] more-itertools = "*" numba = "*" numpy = "*" -tiktoken = "0.3.3" +tiktoken = "*" torch = "*" tqdm = "*" @@ -1574,4 +1574,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "7ba666ad744fc80fff978862f014431c8a03565c7d1b6a459bc759ace4f73078" +content-hash = "d4773fb3718894fe55d566f19e641fd7bd9be1516788eca099c32cd10c4bd0df" diff --git a/pyproject.toml b/pyproject.toml index 3002a128913bb21e48f855a3da45eafb73286dfb..c2fc44ca0d0e7fe1b0298d96ada59ef8535266a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ ffmpeg-python = "^0.2.0" fastapi = "^0.104.1" llvmlite = "^0.41.1" numba = "^0.58.0" -openai-whisper = "20230918" +openai-whisper = "20231106" faster-whisper = "^0.9.0" torch = [ {markers = "sys_platform == 'darwin' and platform_machine == 'arm64'", url = "https://download.pytorch.org/whl/cpu/torch-1.13.0-cp310-none-macosx_11_0_arm64.whl"},