broadfield-dev commited on
Commit
138e306
·
verified ·
1 Parent(s): 2bfcd03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -47
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import subprocess
3
  import sys
4
  from pathlib import Path
5
- error
6
  # --- 1. Clone the VibeVoice Repository ---
7
  repo_dir = "VibeVoice"
8
  if not os.path.exists(repo_dir):
@@ -23,17 +23,6 @@ else:
23
  os.chdir(repo_dir)
24
  print(f"Changed directory to: {os.getcwd()}")
25
 
26
- print("Installing bitsandbytes for potential quantization...")
27
- try:
28
- subprocess.run(
29
- [sys.executable, "-m", "pip", "install", "bitsandbytes"],
30
- check=True, capture_output=True, text=True
31
- )
32
- print("bitsandbytes installed successfully.")
33
- except subprocess.CalledProcessError as e:
34
- print(f"Error installing bitsandbytes: {e.stderr}")
35
- sys.exit(1)
36
-
37
  print("Installing the VibeVoice package in editable mode...")
38
  try:
39
  subprocess.run(
@@ -57,10 +46,9 @@ try:
57
  modified_content = "import spaces\n" + modified_content
58
 
59
  # --- Patch 1: Prevent model loading at startup ---
60
- # We comment out the self.load_model() call in the __init__ method.
61
- # This stops the main CPU process from loading the heavyweight model.
62
  original_init_line = " self.load_model()"
63
- replacement_init_line = " # self.load_model() # Patched: Defer model loading to the GPU worker\n self.model = None\n self.processor = None"
64
 
65
  if original_init_line in modified_content:
66
  modified_content = modified_content.replace(original_init_line, replacement_init_line)
@@ -70,11 +58,11 @@ try:
70
  sys.exit(1)
71
 
72
  # --- Patch 2: Move model loading inside the generation function and add decorator ---
73
- # This ensures the model is loaded "just-in-time" on the GPU worker.
74
  original_method_signature = " def generate_podcast_streaming(self,"
75
 
76
  # Define the model loading code to be inserted.
77
- # We will use 8-bit quantization to be safe with memory.
78
  lazy_load_code = """
79
  # Patched: Lazy-load model and processor on the GPU worker
80
  if self.model is None or self.processor is None:
@@ -82,7 +70,7 @@ try:
82
  self.processor = VibeVoiceProcessor.from_pretrained(self.model_path)
83
  self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
84
  self.model_path,
85
- load_in_8bit=True,
86
  device_map="auto",
87
  )
88
  self.model.eval()
@@ -93,39 +81,27 @@ try:
93
  )
94
  self.model.set_ddpm_inference_steps(num_steps=self.inference_steps)
95
  print("Model and processor loaded successfully on GPU worker.")
96
-
97
  """
98
 
99
- # We add the decorator and the lazy loading code.
100
- replacement_block = (
101
- " @spaces.GPU(duration=120)\n" +
102
- original_method_signature +
103
- "\n" +
104
- " " * 8 + lazy_load_code.strip().replace("\n", "\n" + " " * 8)
105
- )
106
-
107
- if original_method_signature in modified_content:
108
- # Find the start of the method and insert our block right after the signature.
109
- # We need to find the full method signature to insert code into it.
110
- method_start_index = modified_content.find(original_method_signature)
111
- # Find the end of the signature line
112
- signature_end_index = modified_content.find("-> Iterator[tuple]:", method_start_index) + len("-> Iterator[tuple]:")
113
-
114
- # Reconstruct the content
115
- pre_method = modified_content[:method_start_index]
116
- method_signature_and_body = modified_content[method_start_index:]
117
-
118
- # Decorate the original signature
119
- decorated_signature = " @spaces.GPU(duration=120)\n" + original_method_signature
120
- method_signature_and_body = method_signature_and_body.replace(original_method_signature, decorated_signature)
121
-
122
- # Insert the lazy loading code after the signature line
123
- final_method = method_signature_and_body.replace("-> Iterator[tuple]:", "-> Iterator[tuple]:\n" + lazy_load_code, 1)
124
-
125
- modified_content = pre_method + final_method
126
  print("Successfully refactored generation method for lazy loading on GPU.")
127
  else:
128
- print(f"\033[91mError: Could not find '{original_method_signature}' to patch.\033[0m")
129
  sys.exit(1)
130
 
131
  demo_script_path.write_text(modified_content)
 
2
  import subprocess
3
  import sys
4
  from pathlib import Path
5
+
6
  # --- 1. Clone the VibeVoice Repository ---
7
  repo_dir = "VibeVoice"
8
  if not os.path.exists(repo_dir):
 
23
  os.chdir(repo_dir)
24
  print(f"Changed directory to: {os.getcwd()}")
25
 
 
 
 
 
 
 
 
 
 
 
 
26
  print("Installing the VibeVoice package in editable mode...")
27
  try:
28
  subprocess.run(
 
46
  modified_content = "import spaces\n" + modified_content
47
 
48
  # --- Patch 1: Prevent model loading at startup ---
49
+ # Comment out self.load_model() in __init__ to avoid loading on the main CPU process.
 
50
  original_init_line = " self.load_model()"
51
+ replacement_init_line = " # self.load_model() # Patched: Defer model loading\n self.model = None\n self.processor = None"
52
 
53
  if original_init_line in modified_content:
54
  modified_content = modified_content.replace(original_init_line, replacement_init_line)
 
58
  sys.exit(1)
59
 
60
  # --- Patch 2: Move model loading inside the generation function and add decorator ---
61
+ # This ensures the model is loaded "just-in-time" on the GPU worker with proper precision.
62
  original_method_signature = " def generate_podcast_streaming(self,"
63
 
64
  # Define the model loading code to be inserted.
65
+ # We use torch.bfloat16 for a balance of performance and quality.
66
  lazy_load_code = """
67
  # Patched: Lazy-load model and processor on the GPU worker
68
  if self.model is None or self.processor is None:
 
70
  self.processor = VibeVoiceProcessor.from_pretrained(self.model_path)
71
  self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
72
  self.model_path,
73
+ torch_dtype=torch.bfloat16, # Use 16-bit precision for quality
74
  device_map="auto",
75
  )
76
  self.model.eval()
 
81
  )
82
  self.model.set_ddpm_inference_steps(num_steps=self.inference_steps)
83
  print("Model and processor loaded successfully on GPU worker.")
 
84
  """
85
 
86
+ # We need to find the full method signature to insert code into it.
87
+ full_method_signature_line = None
88
+ for line in modified_content.splitlines():
89
+ if "def generate_podcast_streaming" in line:
90
+ full_method_signature_line = line.strip()
91
+ break
92
+
93
+ if full_method_signature_line:
94
+ # We find the end of the method signature to insert our code block.
95
+ target_to_replace = full_method_signature_line + "\n"
96
+ replacement_block = (
97
+ " @spaces.GPU(duration=120)\n" +
98
+ " " + full_method_signature_line + "\n" +
99
+ lazy_load_code
100
+ )
101
+ modified_content = modified_content.replace(target_to_replace, replacement_block, 1)
 
 
 
 
 
 
 
 
 
 
 
102
  print("Successfully refactored generation method for lazy loading on GPU.")
103
  else:
104
+ print(f"\033[91mError: Could not find full method signature for 'generate_podcast_streaming' to patch.\033[0m")
105
  sys.exit(1)
106
 
107
  demo_script_path.write_text(modified_content)