VibeVoice-demo-dev

Paused

App Files Files Community

broadfield-dev commited on Aug 25

Commit

8a1f431

verified ·

1 Parent(s): 7dc8d63

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -31

app.py CHANGED Viewed

@@ -26,12 +26,12 @@ if not os.path.exists(repo_dir):
 else:
     print("Repository already exists. Skipping clone.")
-# --- 2. Install Dependencies ---
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
-# Install the main package
-print("Installing the VibeVoice package...")
 try:
     subprocess.run(
         [sys.executable, "-m", "pip", "install", "-e", "."],
@@ -44,30 +44,15 @@ except subprocess.CalledProcessError as e:
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
-# Install 'spaces' if using ZeroGPU, as it's required for the decorator
-if USE_ZEROGPU:
-    print("Installing the 'spaces' library for ZeroGPU...")
-    try:
-        subprocess.run(
-            [sys.executable, "-m", "pip", "install", "spaces"],
-            check=True,
-            capture_output=True,
-            text=True
-        )
-        print("'spaces' library installed successfully.")
-    except subprocess.CalledProcessError as e:
-        print(f"Error installing 'spaces' library: {e.stderr}")
-        sys.exit(1)
-# --- 3. Modify the demo script based on the toggle ---
 demo_script_path = Path("demo/gradio_demo.py")
-print(f"Reading {demo_script_path}...")
 try:
     file_content = demo_script_path.read_text()
-    # Define the original GPU-specific model loading block we want to replace
-    # This block is problematic because it hardcodes FlashAttention
     original_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
@@ -78,18 +63,18 @@ try:
     if USE_ZEROGPU:
         print("Optimizing for ZeroGPU execution...")
-        # New block for ZeroGPU: We remove the problematic flash_attention line.
-        # Transformers will automatically use the best available attention mechanism.
         replacement_block_gpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
             device_map='cuda',
         )"""
-        # Add 'import spaces' at the beginning of the file
         modified_content = "import spaces\n" + file_content
-        # Decorate the main class with @spaces.GPU to request a GPU
         modified_content = modified_content.replace(
             "class VibeVoiceGradioInterface:",
             "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:"
@@ -102,19 +87,18 @@ try:
     else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
-        # New block for CPU: Use float32 and map directly to CPU.
-        # FlashAttention is not compatible with CPU.
         replacement_block_cpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.float32,  # Use float32 for CPU
             device_map="cpu",
         )"""
-        # Replace the model loading block
         modified_content = file_content.replace(original_block, replacement_block_cpu)
         print("Script modified for CPU successfully.")
-    # Write the modified content back to the file
     demo_script_path.write_text(modified_content)
 except Exception as e:
@@ -124,7 +108,7 @@ except Exception as e:
 # --- 4. Launch the Gradio Demo ---
 model_id = "microsoft/VibeVoice-1.5B"
-# Construct the command as specified in the README
 command = [
     "python",
     str(demo_script_path),

 else:
     print("Repository already exists. Skipping clone.")
+# --- 2. Install the VibeVoice Package ---
+# Note: Other dependencies are installed via requirements.txt
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
+print("Installing the VibeVoice package in editable mode...")
 try:
     subprocess.run(
         [sys.executable, "-m", "pip", "install", "-e", "."],
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
+# --- 3. Modify the demo script to be environment-aware ---
 demo_script_path = Path("demo/gradio_demo.py")
+print(f"Reading {demo_script_path} to apply environment-specific modifications...")
 try:
     file_content = demo_script_path.read_text()
+    # Define the original model loading block that we need to replace.
+    # This block is problematic because it hardcodes FlashAttention.
     original_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
     if USE_ZEROGPU:
         print("Optimizing for ZeroGPU execution...")
+        # New block for ZeroGPU: We remove the problematic `attn_implementation` line.
+        # `transformers` will automatically use the best available attention mechanism.
         replacement_block_gpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
             device_map='cuda',
         )"""
+        # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator
         modified_content = "import spaces\n" + file_content
+        # Decorate the main interface class to request a GPU from the Spaces infrastructure
         modified_content = modified_content.replace(
             "class VibeVoiceGradioInterface:",
             "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:"
     else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
+        # New block for CPU: Use float32 and map directly to the CPU.
         replacement_block_cpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.float32,  # Use float32 for CPU
             device_map="cpu",
         )"""
+        # Replace the original model loading block with the CPU version
         modified_content = file_content.replace(original_block, replacement_block_cpu)
         print("Script modified for CPU successfully.")
+    # Write the dynamically modified content back to the demo file
     demo_script_path.write_text(modified_content)
 except Exception as e:
 # --- 4. Launch the Gradio Demo ---
 model_id = "microsoft/VibeVoice-1.5B"
+# Construct the command to run the modified demo script
 command = [
     "python",
     str(demo_script_path),