Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Oct 2, 2024

Commit

f3ecc7a

unverified ·

2 Parent(s): 2d3dbbe 4fe08f3

Merge pull request #306 from jhj0517/feature/add-tests

Browse files

Files changed (17) hide show

.github/workflows/{shell-scrpit-test.yml → ci-shell.yml} +23 -19
.github/workflows/ci.yml +41 -0
modules/translation/deepl_api.py +26 -26
modules/translation/nllb_inference.py +12 -2
modules/translation/translation_base.py +8 -6
modules/utils/subtitle_manager.py +0 -3
modules/whisper/whisper_base.py +10 -5
modules/whisper/whisper_parameter.py +10 -0
requirements.txt +1 -1
tests/test_bgm_separation.py +53 -0
tests/test_config.py +17 -0
tests/test_diarization.py +31 -0
tests/test_srt.srt +7 -0
tests/test_transcription.py +97 -0
tests/test_translation.py +52 -0
tests/test_vad.py +26 -0
tests/test_vtt.vtt +6 -0

.github/workflows/{shell-scrpit-test.yml → ci-shell.yml} RENAMED Viewed

@@ -1,38 +1,42 @@
-name: Shell Script Test
 on:
   push:
-    branches: ["feature/shell-script"]
-env:
-  PYTHON_VERSION: '3.9'
 jobs:
   test-shell-script:
     runs-on: ubuntu-latest
     steps:
-    - name: 'Checkout GitHub Action'
-      uses: actions/checkout@v3
-    - name: Setup Python ${{ env.PYTHON_VERSION }} Environment
-      uses: actions/setup-python@v4
       with:
-        python-version: ${{ env.PYTHON_VERSION }}
-    - name: 'Setup FFmpeg'
-      uses: FedericoCarboni/setup-ffmpeg@v3
-      id: setup-ffmpeg
-      with:
-        ffmpeg-version: release
-        architecture: 'arm64'
-        linking-type: static
-    - name: 'Execute Install.sh'
       run: |
         chmod +x ./Install.sh
         ./Install.sh
-    - name: 'Execute start-webui.sh'
       run: |
         chmod +x ./start-webui.sh
         timeout 60s ./start-webui.sh || true

+name: CI-Shell Script
 on:
+  workflow_dispatch:
   push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
 jobs:
   test-shell-script:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: [ "3.10" ]
     steps:
+    - name: Clean up space for action
+      run: rm -rf /opt/hostedtoolcache
+    - uses: actions/checkout@v4
+    - name: Setup Python
+      uses: actions/setup-python@v5
       with:
+        python-version: ${{ matrix.python }}
+    - name: Install git and ffmpeg
+      run: sudo apt-get update && sudo apt-get install -y git ffmpeg
+    - name: Execute Install.sh
       run: |
         chmod +x ./Install.sh
         ./Install.sh
+    - name: Execute start-webui.sh
       run: |
         chmod +x ./start-webui.sh
         timeout 60s ./start-webui.sh || true

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,41 @@

+name: CI
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: ["3.10"]
+    env:
+      DEEPL_API_KEY: ${{ secrets.DEEPL_API_KEY }}
+    steps:
+      - name: Clean up space for action
+        run: rm -rf /opt/hostedtoolcache
+      - uses: actions/checkout@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install git and ffmpeg
+        run: sudo apt-get update && sudo apt-get install -y git ffmpeg
+      - name: Install dependencies
+        run: pip install -r requirements.txt pytest
+      - name: Run test
+        run: python -m pytest -rs tests

modules/translation/deepl_api.py CHANGED Viewed

@@ -98,8 +98,8 @@ class DeepLAPI:
                         fileobjs: list,
                         source_lang: str,
                         target_lang: str,
-                        is_pro: bool,
-                        add_timestamp: bool,
                         progress=gr.Progress()) -> list:
         """
         Translate subtitle files using DeepL API
@@ -126,6 +126,9 @@ class DeepLAPI:
         String to return to gr.Textbox()
         Files to return to gr.Files()
         """
         self.cache_parameters(
             api_key=auth_key,
             is_pro=is_pro,
@@ -136,37 +139,28 @@ class DeepLAPI:
         files_info = {}
         for fileobj in fileobjs:
-            file_path = fileobj.name
-            file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
             if file_ext == ".srt":
                 parsed_dicts = parse_srt(file_path=file_path)
-                batch_size = self.max_text_batch_size
-                for batch_start in range(0, len(parsed_dicts), batch_size):
-                    batch_end = min(batch_start + batch_size, len(parsed_dicts))
-                    sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
-                    translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
-                                                                    target_lang, is_pro)
-                    for i, translated_text in enumerate(translated_texts):
-                        parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
-                    progress(batch_end / len(parsed_dicts), desc="Translating..")
-                subtitle = get_serialized_srt(parsed_dicts)
             elif file_ext == ".vtt":
                 parsed_dicts = parse_vtt(file_path=file_path)
-                batch_size = self.max_text_batch_size
-                for batch_start in range(0, len(parsed_dicts), batch_size):
-                    batch_end = min(batch_start + batch_size, len(parsed_dicts))
-                    sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
-                    translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
-                                                                    target_lang, is_pro)
-                    for i, translated_text in enumerate(translated_texts):
-                        parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
-                    progress(batch_end / len(parsed_dicts), desc="Translating..")
                 subtitle = get_serialized_vtt(parsed_dicts)
             if add_timestamp:
@@ -193,8 +187,14 @@ class DeepLAPI:
                                 text: list,
                                 source_lang: str,
                                 target_lang: str,
-                                is_pro: bool):
         """Request API response to DeepL server"""
         url = 'https://api.deepl.com/v2/translate' if is_pro else 'https://api-free.deepl.com/v2/translate'
         headers = {

                         fileobjs: list,
                         source_lang: str,
                         target_lang: str,
+                        is_pro: bool = False,
+                        add_timestamp: bool = True,
                         progress=gr.Progress()) -> list:
         """
         Translate subtitle files using DeepL API
         String to return to gr.Textbox()
         Files to return to gr.Files()
         """
+        if fileobjs and isinstance(fileobjs[0], gr.utils.NamedString):
+            fileobjs = [fileobj.name for fileobj in fileobjs]
         self.cache_parameters(
             api_key=auth_key,
             is_pro=is_pro,
         files_info = {}
         for fileobj in fileobjs:
+            file_path = fileobj
+            file_name, file_ext = os.path.splitext(os.path.basename(fileobj))
             if file_ext == ".srt":
                 parsed_dicts = parse_srt(file_path=file_path)
             elif file_ext == ".vtt":
                 parsed_dicts = parse_vtt(file_path=file_path)
+            batch_size = self.max_text_batch_size
+            for batch_start in range(0, len(parsed_dicts), batch_size):
+                batch_end = min(batch_start + batch_size, len(parsed_dicts))
+                sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]]
+                translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang,
+                                                                target_lang, is_pro)
+                for i, translated_text in enumerate(translated_texts):
+                    parsed_dicts[batch_start + i]["sentence"] = translated_text["text"]
+                progress(batch_end / len(parsed_dicts), desc="Translating..")
+            if file_ext == ".srt":
+                subtitle = get_serialized_srt(parsed_dicts)
+            elif file_ext == ".vtt":
                 subtitle = get_serialized_vtt(parsed_dicts)
             if add_timestamp:
                                 text: list,
                                 source_lang: str,
                                 target_lang: str,
+                                is_pro: bool = False):
         """Request API response to DeepL server"""
+        if source_lang not in list(DEEPL_AVAILABLE_SOURCE_LANGS.keys()):
+            raise ValueError(f"Source language {source_lang} is not supported."
+                             f"Use one of {list(DEEPL_AVAILABLE_SOURCE_LANGS.keys())}")
+        if target_lang not in list(DEEPL_AVAILABLE_TARGET_LANGS.keys()):
+            raise ValueError(f"Target language {target_lang} is not supported."
+                             f"Use one of {list(DEEPL_AVAILABLE_TARGET_LANGS.keys())}")
         url = 'https://api.deepl.com/v2/translate' if is_pro else 'https://api-free.deepl.com/v2/translate'
         headers = {

modules/translation/nllb_inference.py CHANGED Viewed

@@ -37,6 +37,17 @@ class NLLBInference(TranslationBase):
                      tgt_lang: str,
                      progress: gr.Progress = gr.Progress()
                      ):
         if model_size != self.current_model_size or self.model is None:
             print("\nInitializing NLLB Model..\n")
             progress(0, desc="Initializing NLLB Model..")
@@ -48,8 +59,7 @@ class NLLBInference(TranslationBase):
             self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_size,
                                                            cache_dir=os.path.join(self.model_dir, "tokenizers"),
                                                            local_files_only=local_files_only)
-        src_lang = NLLB_AVAILABLE_LANGS[src_lang]
-        tgt_lang = NLLB_AVAILABLE_LANGS[tgt_lang]
         self.pipeline = pipeline("translation",
                                  model=self.model,
                                  tokenizer=self.tokenizer,

                      tgt_lang: str,
                      progress: gr.Progress = gr.Progress()
                      ):
+        def validate_language(lang: str) -> str:
+            if lang in NLLB_AVAILABLE_LANGS:
+                return NLLB_AVAILABLE_LANGS[lang]
+            elif lang not in NLLB_AVAILABLE_LANGS.values():
+                raise ValueError(
+                    f"Language '{lang}' is not supported. Use one of: {list(NLLB_AVAILABLE_LANGS.keys())}")
+            return lang
+        src_lang = validate_language(src_lang)
+        tgt_lang = validate_language(tgt_lang)
         if model_size != self.current_model_size or self.model is None:
             print("\nInitializing NLLB Model..\n")
             progress(0, desc="Initializing NLLB Model..")
             self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_size,
                                                            cache_dir=os.path.join(self.model_dir, "tokenizers"),
                                                            local_files_only=local_files_only)
         self.pipeline = pipeline("translation",
                                  model=self.model,
                                  tokenizer=self.tokenizer,

modules/translation/translation_base.py CHANGED Viewed

@@ -46,8 +46,8 @@ class TranslationBase(ABC):
                        model_size: str,
                        src_lang: str,
                        tgt_lang: str,
-                       max_length: int,
-                       add_timestamp: bool,
                        progress=gr.Progress()) -> list:
         """
         Translate subtitle file from source language to target language
@@ -77,6 +77,9 @@ class TranslationBase(ABC):
         Files to return to gr.Files()
         """
         try:
             self.cache_parameters(model_size=model_size,
                                   src_lang=src_lang,
                                   tgt_lang=tgt_lang,
@@ -90,10 +93,9 @@ class TranslationBase(ABC):
             files_info = {}
             for fileobj in fileobjs:
-                file_path = fileobj.name
-                file_name, file_ext = os.path.splitext(os.path.basename(fileobj.name))
                 if file_ext == ".srt":
-                    parsed_dicts = parse_srt(file_path=file_path)
                     total_progress = len(parsed_dicts)
                     for index, dic in enumerate(parsed_dicts):
                         progress(index / total_progress, desc="Translating..")
@@ -102,7 +104,7 @@ class TranslationBase(ABC):
                     subtitle = get_serialized_srt(parsed_dicts)
                 elif file_ext == ".vtt":
-                    parsed_dicts = parse_vtt(file_path=file_path)
                     total_progress = len(parsed_dicts)
                     for index, dic in enumerate(parsed_dicts):
                         progress(index / total_progress, desc="Translating..")

                        model_size: str,
                        src_lang: str,
                        tgt_lang: str,
+                       max_length: int = 200,
+                       add_timestamp: bool = True,
                        progress=gr.Progress()) -> list:
         """
         Translate subtitle file from source language to target language
         Files to return to gr.Files()
         """
         try:
+            if fileobjs and isinstance(fileobjs[0], gr.utils.NamedString):
+                fileobjs = [file.name for file in fileobjs]
             self.cache_parameters(model_size=model_size,
                                   src_lang=src_lang,
                                   tgt_lang=tgt_lang,
             files_info = {}
             for fileobj in fileobjs:
+                file_name, file_ext = os.path.splitext(os.path.basename(fileobj))
                 if file_ext == ".srt":
+                    parsed_dicts = parse_srt(file_path=fileobj)
                     total_progress = len(parsed_dicts)
                     for index, dic in enumerate(parsed_dicts):
                         progress(index / total_progress, desc="Translating..")
                     subtitle = get_serialized_srt(parsed_dicts)
                 elif file_ext == ".vtt":
+                    parsed_dicts = parse_vtt(file_path=fileobj)
                     total_progress = len(parsed_dicts)
                     for index, dic in enumerate(parsed_dicts):
                         progress(index / total_progress, desc="Translating..")

modules/utils/subtitle_manager.py CHANGED Viewed

@@ -119,11 +119,8 @@ def get_serialized_vtt(dicts):
 def safe_filename(name):
-    from app import _args
     INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
     safe_name = re.sub(INVALID_FILENAME_CHARS, '_', name)
-    if not _args.colab:
-        return safe_name
     # Truncate the filename if it exceeds the max_length (20)
     if len(safe_name) > 20:
         file_extension = safe_name.split('.')[-1]

 def safe_filename(name):
     INVALID_FILENAME_CHARS = r'[<>:"/\\|?*\x00-\x1f]'
     safe_name = re.sub(INVALID_FILENAME_CHARS, '_', name)
     # Truncate the filename if it exceeds the max_length (20)
     if len(safe_name) > 20:
         file_extension = safe_name.split('.')[-1]

modules/whisper/whisper_base.py CHANGED Viewed

@@ -104,7 +104,9 @@ class WhisperBase(ABC):
             add_timestamp=add_timestamp
         )
-        if params.lang == "Automatic Detection":
             params.lang = None
         else:
             language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
@@ -133,7 +135,7 @@ class WhisperBase(ABC):
         if params.vad_filter:
             # Explicit value set for float('inf') from gr.Number()
-            if params.max_speech_duration_s >= 9999:
                 params.max_speech_duration_s = float('inf')
             vad_options = VadOptions(
@@ -208,18 +210,21 @@ class WhisperBase(ABC):
         try:
             if input_folder_path:
                 files = get_media_files(input_folder_path)
-                files = format_gradio_files(files)
             files_info = {}
             for file in files:
                 transcribed_segments, time_for_task = self.run(
-                    file.name,
                     progress,
                     add_timestamp,
                     *whisper_params,
                 )
-                file_name, file_ext = os.path.splitext(os.path.basename(file.name))
                 subtitle, file_path = self.generate_and_write_file(
                     file_name=file_name,
                     transcribed_segments=transcribed_segments,

             add_timestamp=add_timestamp
         )
+        if params.lang is None:
+            pass
+        elif params.lang == "Automatic Detection":
             params.lang = None
         else:
             language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
         if params.vad_filter:
             # Explicit value set for float('inf') from gr.Number()
+            if params.max_speech_duration_s is None or params.max_speech_duration_s >= 9999:
                 params.max_speech_duration_s = float('inf')
             vad_options = VadOptions(
         try:
             if input_folder_path:
                 files = get_media_files(input_folder_path)
+            if isinstance(files, str):
+                files = [files]
+            if files and isinstance(files[0], gr.utils.NamedString):
+                files = [file.name for file in files]
             files_info = {}
             for file in files:
                 transcribed_segments, time_for_task = self.run(
+                    file,
                     progress,
                     add_timestamp,
                     *whisper_params,
                 )
+                file_name, file_ext = os.path.splitext(os.path.basename(file))
                 subtitle, file_path = self.generate_and_write_file(
                     file_name=file_name,
                     transcribed_segments=transcribed_segments,

modules/whisper/whisper_parameter.py CHANGED Viewed

@@ -357,3 +357,13 @@ class WhisperValues:
             },
         }
         return data

             },
         }
         return data
+    def as_list(self) -> list:
+        """
+        Converts the data class attributes into a list
+        Returns
+        ----------
+        A list of Whisper parameters
+        """
+        return [getattr(self, f.name) for f in fields(self)]

requirements.txt CHANGED Viewed

@@ -12,6 +12,6 @@ transformers==4.42.3
 gradio==4.43.0
 pytubefix
 ruamel.yaml==0.18.6
-pyannote.audio==3.3.1
 git+https://github.com/jhj0517/ultimatevocalremover_api.git
 git+https://github.com/jhj0517/pyrubberband.git

 gradio==4.43.0
 pytubefix
 ruamel.yaml==0.18.6
+pyannote.audio==3.3.1;
 git+https://github.com/jhj0517/ultimatevocalremover_api.git
 git+https://github.com/jhj0517/pyrubberband.git

tests/test_bgm_separation.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from modules.utils.paths import *
+from modules.whisper.whisper_factory import WhisperFactory
+from modules.whisper.whisper_parameter import WhisperValues
+from test_config import *
+from test_transcription import download_file, test_transcribe
+import gradio as gr
+import pytest
+import torch
+import os
+@pytest.mark.skipif(
+    not is_cuda_available(),
+    reason="Skipping because the test only works on GPU"
+)
+@pytest.mark.parametrize(
+    "whisper_type,vad_filter,bgm_separation,diarization",
+    [
+        ("whisper", False, True, False),
+        ("faster-whisper", False, True, False),
+        ("insanely_fast_whisper", False, True, False)
+    ]
+)
+def test_bgm_separation_pipeline(
+    whisper_type: str,
+    vad_filter: bool,
+    bgm_separation: bool,
+    diarization: bool,
+):
+    test_transcribe(whisper_type, vad_filter, bgm_separation, diarization)
+@pytest.mark.skipif(
+    not is_cuda_available(),
+    reason="Skipping because the test only works on GPU"
+)
+@pytest.mark.parametrize(
+    "whisper_type,vad_filter,bgm_separation,diarization",
+    [
+        ("whisper", True, True, False),
+        ("faster-whisper", True, True, False),
+        ("insanely_fast_whisper", True, True, False)
+    ]
+)
+def test_bgm_separation_with_vad_pipeline(
+    whisper_type: str,
+    vad_filter: bool,
+    bgm_separation: bool,
+    diarization: bool,
+):
+    test_transcribe(whisper_type, vad_filter, bgm_separation, diarization)

tests/test_config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from modules.utils.paths import *
+import os
+import torch
+TEST_FILE_DOWNLOAD_URL = "https://github.com/jhj0517/whisper_flutter_new/raw/main/example/assets/jfk.wav"
+TEST_FILE_PATH = os.path.join(WEBUI_DIR, "tests", "jfk.wav")
+TEST_YOUTUBE_URL = "https://www.youtube.com/watch?v=4WEQtgnBu0I&ab_channel=AndriaFitzer"
+TEST_WHISPER_MODEL = "tiny"
+TEST_UVR_MODEL = "UVR-MDX-NET-Inst_HQ_4"
+TEST_NLLB_MODEL = "facebook/nllb-200-distilled-600M"
+TEST_SUBTITLE_SRT_PATH = os.path.join(WEBUI_DIR, "tests", "test_srt.srt")
+TEST_SUBTITLE_VTT_PATH = os.path.join(WEBUI_DIR, "tests", "test_vtt.vtt")
+def is_cuda_available():
+    return torch.cuda.is_available()

tests/test_diarization.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from modules.utils.paths import *
+from modules.whisper.whisper_factory import WhisperFactory
+from modules.whisper.whisper_parameter import WhisperValues
+from test_config import *
+from test_transcription import download_file, test_transcribe
+import gradio as gr
+import pytest
+import os
+@pytest.mark.skipif(
+    not is_cuda_available(),
+    reason="Skipping because the test only works on GPU"
+)
+@pytest.mark.parametrize(
+    "whisper_type,vad_filter,bgm_separation,diarization",
+    [
+        ("whisper", False, False, True),
+        ("faster-whisper", False, False, True),
+        ("insanely_fast_whisper", False, False, True)
+    ]
+)
+def test_diarization_pipeline(
+    whisper_type: str,
+    vad_filter: bool,
+    bgm_separation: bool,
+    diarization: bool,
+):
+    test_transcribe(whisper_type, vad_filter, bgm_separation, diarization)

tests/test_srt.srt ADDED Viewed

	@@ -0,0 +1,7 @@

+1
+00:00:00,000 --> 00:00:02,240
+You've got
+2
+00:00:02,240 --> 00:00:04,160
+a friend in me.

tests/test_transcription.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from modules.whisper.whisper_factory import WhisperFactory
+from modules.whisper.whisper_parameter import WhisperValues
+from modules.utils.paths import WEBUI_DIR
+from test_config import *
+import requests
+import pytest
+import gradio as gr
+import os
+@pytest.mark.parametrize(
+    "whisper_type,vad_filter,bgm_separation,diarization",
+    [
+        ("whisper", False, False, False),
+        ("faster-whisper", False, False, False),
+        ("insanely_fast_whisper", False, False, False)
+    ]
+)
+def test_transcribe(
+    whisper_type: str,
+    vad_filter: bool,
+    bgm_separation: bool,
+    diarization: bool,
+):
+    audio_path_dir = os.path.join(WEBUI_DIR, "tests")
+    audio_path = os.path.join(audio_path_dir, "jfk.wav")
+    if not os.path.exists(audio_path):
+        download_file(TEST_FILE_DOWNLOAD_URL, audio_path_dir)
+    whisper_inferencer = WhisperFactory.create_whisper_inference(
+        whisper_type=whisper_type,
+    )
+    print(
+        f"""Whisper Device : {whisper_inferencer.device}\n"""
+        f"""BGM Separation Device: {whisper_inferencer.music_separator.device}\n"""
+        f"""Diarization Device: {whisper_inferencer.diarizer.device}"""
+    )
+    hparams = WhisperValues(
+        model_size=TEST_WHISPER_MODEL,
+        vad_filter=vad_filter,
+        is_bgm_separate=bgm_separation,
+        compute_type=whisper_inferencer.current_compute_type,
+        uvr_enable_offload=True,
+        is_diarize=diarization,
+    ).as_list()
+    subtitle_str, file_path = whisper_inferencer.transcribe_file(
+        [audio_path],
+        None,
+        "SRT",
+        False,
+        gr.Progress(),
+        *hparams,
+    )
+    assert isinstance(subtitle_str, str) and subtitle_str
+    assert isinstance(file_path[0], str) and file_path
+    whisper_inferencer.transcribe_youtube(
+        TEST_YOUTUBE_URL,
+        "SRT",
+        False,
+        gr.Progress(),
+        *hparams,
+    )
+    assert isinstance(subtitle_str, str) and subtitle_str
+    assert isinstance(file_path[0], str) and file_path
+    whisper_inferencer.transcribe_mic(
+        audio_path,
+        "SRT",
+        False,
+        gr.Progress(),
+        *hparams,
+    )
+    assert isinstance(subtitle_str, str) and subtitle_str
+    assert isinstance(file_path[0], str) and file_path
+def download_file(url, save_dir):
+    if os.path.exists(TEST_FILE_PATH):
+        return
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    file_name = url.split("/")[-1]
+    file_path = os.path.join(save_dir, file_name)
+    response = requests.get(url)
+    with open(file_path, "wb") as file:
+        file.write(response.content)
+    print(f"File downloaded to: {file_path}")

tests/test_translation.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from modules.translation.deepl_api import DeepLAPI
+from modules.translation.nllb_inference import NLLBInference
+from test_config import *
+import os
+import pytest
+@pytest.mark.parametrize("model_size, file_path", [
+    (TEST_NLLB_MODEL, TEST_SUBTITLE_SRT_PATH),
+    (TEST_NLLB_MODEL, TEST_SUBTITLE_VTT_PATH),
+])
+def test_nllb_inference(
+    model_size: str,
+    file_path: str
+):
+    nllb_inferencer = NLLBInference()
+    print(f"NLLB Device : {nllb_inferencer.device}")
+    result_str, file_paths = nllb_inferencer.translate_file(
+        fileobjs=[file_path],
+        model_size=model_size,
+        src_lang="eng_Latn",
+        tgt_lang="kor_Hang",
+    )
+    assert isinstance(result_str, str)
+    assert isinstance(file_paths[0], str)
+@pytest.mark.parametrize("file_path", [
+    TEST_SUBTITLE_SRT_PATH,
+    TEST_SUBTITLE_VTT_PATH,
+])
+def test_deepl_api(
+    file_path: str
+):
+    deepl_api = DeepLAPI()
+    api_key = os.getenv("DEEPL_API_KEY")
+    result_str, file_paths = deepl_api.translate_deepl(
+        auth_key=api_key,
+        fileobjs=[file_path],
+        source_lang="English",
+        target_lang="Korean",
+        is_pro=False,
+        add_timestamp=True,
+    )
+    assert isinstance(result_str, str)
+    assert isinstance(file_paths[0], str)

tests/test_vad.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from modules.utils.paths import *
+from modules.whisper.whisper_factory import WhisperFactory
+from modules.whisper.whisper_parameter import WhisperValues
+from test_config import *
+from test_transcription import download_file, test_transcribe
+import gradio as gr
+import pytest
+import os
+@pytest.mark.parametrize(
+    "whisper_type,vad_filter,bgm_separation,diarization",
+    [
+        ("whisper", True, False, False),
+        ("faster-whisper", True, False, False),
+        ("insanely_fast_whisper", True, False, False)
+    ]
+)
+def test_vad_pipeline(
+    whisper_type: str,
+    vad_filter: bool,
+    bgm_separation: bool,
+    diarization: bool,
+):
+    test_transcribe(whisper_type, vad_filter, bgm_separation, diarization)

tests/test_vtt.vtt ADDED Viewed

	@@ -0,0 +1,6 @@

+WEBVTT
+00:00:00.500 --> 00:00:02.000
+You've got
+00:00:02.500 --> 00:00:04.300
+a friend in me.