VibeVoice-demo-dev

Paused

App Files Files Community

broadfield-dev commited on Aug 26

Commit

138e306

verified ·

1 Parent(s): 2bfcd03

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -47

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import subprocess
 import sys
 from pathlib import Path
-error
 # --- 1. Clone the VibeVoice Repository ---
 repo_dir = "VibeVoice"
 if not os.path.exists(repo_dir):
@@ -23,17 +23,6 @@ else:
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
-print("Installing bitsandbytes for potential quantization...")
-try:
-    subprocess.run(
-        [sys.executable, "-m", "pip", "install", "bitsandbytes"],
-        check=True, capture_output=True, text=True
-    )
-    print("bitsandbytes installed successfully.")
-except subprocess.CalledProcessError as e:
-    print(f"Error installing bitsandbytes: {e.stderr}")
-    sys.exit(1)
 print("Installing the VibeVoice package in editable mode...")
 try:
     subprocess.run(
@@ -57,10 +46,9 @@ try:
         modified_content = "import spaces\n" + modified_content
     # --- Patch 1: Prevent model loading at startup ---
-    # We comment out the self.load_model() call in the __init__ method.
-    # This stops the main CPU process from loading the heavyweight model.
     original_init_line = "        self.load_model()"
-    replacement_init_line = "        # self.load_model() # Patched: Defer model loading to the GPU worker\n        self.model = None\n        self.processor = None"
     if original_init_line in modified_content:
         modified_content = modified_content.replace(original_init_line, replacement_init_line)
@@ -70,11 +58,11 @@ try:
         sys.exit(1)
     # --- Patch 2: Move model loading inside the generation function and add decorator ---
-    # This ensures the model is loaded "just-in-time" on the GPU worker.
     original_method_signature = "    def generate_podcast_streaming(self,"
     # Define the model loading code to be inserted.
-    # We will use 8-bit quantization to be safe with memory.
     lazy_load_code = """
         # Patched: Lazy-load model and processor on the GPU worker
         if self.model is None or self.processor is None:
@@ -82,7 +70,7 @@ try:
             self.processor = VibeVoiceProcessor.from_pretrained(self.model_path)
             self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
                 self.model_path,
-                load_in_8bit=True,
                 device_map="auto",
             )
             self.model.eval()
@@ -93,39 +81,27 @@ try:
             )
             self.model.set_ddpm_inference_steps(num_steps=self.inference_steps)
             print("Model and processor loaded successfully on GPU worker.")
 """
-    # We add the decorator and the lazy loading code.
-    replacement_block = (
-        "    @spaces.GPU(duration=120)\n" +
-        original_method_signature +
-        "\n" +
-        " " * 8 + lazy_load_code.strip().replace("\n", "\n" + " " * 8)
-    )
-    if original_method_signature in modified_content:
-        # Find the start of the method and insert our block right after the signature.
-        # We need to find the full method signature to insert code into it.
-        method_start_index = modified_content.find(original_method_signature)
-        # Find the end of the signature line
-        signature_end_index = modified_content.find("-> Iterator[tuple]:", method_start_index) + len("-> Iterator[tuple]:")
-        # Reconstruct the content
-        pre_method = modified_content[:method_start_index]
-        method_signature_and_body = modified_content[method_start_index:]
-        # Decorate the original signature
-        decorated_signature = "    @spaces.GPU(duration=120)\n" + original_method_signature
-        method_signature_and_body = method_signature_and_body.replace(original_method_signature, decorated_signature)
-        # Insert the lazy loading code after the signature line
-        final_method = method_signature_and_body.replace("-> Iterator[tuple]:", "-> Iterator[tuple]:\n" + lazy_load_code, 1)
-        modified_content = pre_method + final_method
         print("Successfully refactored generation method for lazy loading on GPU.")
     else:
-        print(f"\033[91mError: Could not find '{original_method_signature}' to patch.\033[0m")
         sys.exit(1)
     demo_script_path.write_text(modified_content)

 import subprocess
 import sys
 from pathlib import Path
 # --- 1. Clone the VibeVoice Repository ---
 repo_dir = "VibeVoice"
 if not os.path.exists(repo_dir):
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
 print("Installing the VibeVoice package in editable mode...")
 try:
     subprocess.run(
         modified_content = "import spaces\n" + modified_content
     # --- Patch 1: Prevent model loading at startup ---
+    # Comment out self.load_model() in __init__ to avoid loading on the main CPU process.
     original_init_line = "        self.load_model()"
+    replacement_init_line = "        # self.load_model() # Patched: Defer model loading\n        self.model = None\n        self.processor = None"
     if original_init_line in modified_content:
         modified_content = modified_content.replace(original_init_line, replacement_init_line)
         sys.exit(1)
     # --- Patch 2: Move model loading inside the generation function and add decorator ---
+    # This ensures the model is loaded "just-in-time" on the GPU worker with proper precision.
     original_method_signature = "    def generate_podcast_streaming(self,"
     # Define the model loading code to be inserted.
+    # We use torch.bfloat16 for a balance of performance and quality.
     lazy_load_code = """
         # Patched: Lazy-load model and processor on the GPU worker
         if self.model is None or self.processor is None:
             self.processor = VibeVoiceProcessor.from_pretrained(self.model_path)
             self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
                 self.model_path,
+                torch_dtype=torch.bfloat16, # Use 16-bit precision for quality
                 device_map="auto",
             )
             self.model.eval()
             )
             self.model.set_ddpm_inference_steps(num_steps=self.inference_steps)
             print("Model and processor loaded successfully on GPU worker.")
 """
+    # We need to find the full method signature to insert code into it.
+    full_method_signature_line = None
+    for line in modified_content.splitlines():
+        if "def generate_podcast_streaming" in line:
+            full_method_signature_line = line.strip()
+            break
+    if full_method_signature_line:
+        # We find the end of the method signature to insert our code block.
+        target_to_replace = full_method_signature_line + "\n"
+        replacement_block = (
+            "    @spaces.GPU(duration=120)\n" +
+            "    " + full_method_signature_line + "\n" +
+            lazy_load_code
+        )
+        modified_content = modified_content.replace(target_to_replace, replacement_block, 1)
         print("Successfully refactored generation method for lazy loading on GPU.")
     else:
+        print(f"\033[91mError: Could not find full method signature for 'generate_podcast_streaming' to patch.\033[0m")
         sys.exit(1)
     demo_script_path.write_text(modified_content)