rdune71 commited on
Commit
2a0dea8
Β·
1 Parent(s): b5d5e39

Fix HF endpoint URL parsing and add scale-to-zero handling

Browse files
app.py CHANGED
@@ -105,18 +105,35 @@ with st.sidebar:
105
  except Exception as e:
106
  st.warning(f"πŸ¦™ Ollama: Status check failed")
107
 
108
- # HF Endpoint Status
 
109
  try:
110
  from services.hf_endpoint_monitor import hf_monitor
111
- hf_status = hf_monitor.get_status_summary()
112
- if "🟒" in hf_status:
113
- st.success(f"πŸ€— HF Endpoint: {hf_status.replace('🟒 ', '')}")
114
- elif "🟑" in hf_status:
115
- st.warning(f"πŸ€— HF Endpoint: {hf_status.replace('🟑 ', '')}")
 
 
 
 
 
 
 
 
 
116
  else:
117
- st.error(f"πŸ€— HF Endpoint: {hf_status.replace('πŸ”΄ ', '')}")
 
 
 
 
 
 
 
118
  except Exception as e:
119
- st.warning("πŸ€— HF Endpoint: Status check failed")
120
 
121
  # Redis Status
122
  redis_healthy = check_redis_health()
 
105
  except Exception as e:
106
  st.warning(f"πŸ¦™ Ollama: Status check failed")
107
 
108
+ # Enhanced HF Status Display
109
+ st.subheader("HF Endpoint Status")
110
  try:
111
  from services.hf_endpoint_monitor import hf_monitor
112
+
113
+ # Show current status
114
+ status_summary = hf_monitor.get_status_summary()
115
+ if "🟒" in status_summary:
116
+ st.success(status_summary)
117
+ elif "🟑" in status_summary:
118
+ st.warning(status_summary)
119
+ if st.button("⚑ Wake Up HF Endpoint"):
120
+ with st.spinner("Waking up HF endpoint... (this may take 2-4 minutes)"):
121
+ success = hf_monitor.handle_scale_to_zero()
122
+ if success:
123
+ st.success("βœ… HF endpoint is now awake!")
124
+ else:
125
+ st.error("❌ Failed to wake up HF endpoint. Please try again.")
126
  else:
127
+ st.error(status_summary)
128
+ if st.button("πŸš€ Initialize HF Endpoint"):
129
+ with st.spinner("Initializing HF endpoint... (this may take 2-4 minutes)"):
130
+ success = hf_monitor.handle_scale_to_zero()
131
+ if success:
132
+ st.success("βœ… HF endpoint initialized successfully!")
133
+ else:
134
+ st.error("❌ Failed to initialize HF endpoint.")
135
  except Exception as e:
136
+ st.error(f"❌ HF Monitor Error: {str(e)}")
137
 
138
  # Redis Status
139
  redis_healthy = check_redis_health()
core/__pycache__/coordinator.cpython-313.pyc CHANGED
Binary files a/core/__pycache__/coordinator.cpython-313.pyc and b/core/__pycache__/coordinator.cpython-313.pyc differ
 
services/__pycache__/hf_endpoint_monitor.cpython-313.pyc CHANGED
Binary files a/services/__pycache__/hf_endpoint_monitor.cpython-313.pyc and b/services/__pycache__/hf_endpoint_monitor.cpython-313.pyc differ
 
services/hf_endpoint_monitor.py CHANGED
@@ -10,57 +10,183 @@ class HFEndpointMonitor:
10
  """Monitor Hugging Face endpoint status and health"""
11
 
12
  def __init__(self):
13
- self.endpoint_url = config.hf_api_url
 
 
14
  self.hf_token = config.hf_token
15
  self.is_initialized = False
16
  self.last_check = 0
17
  self.check_interval = 60 # Check every minute
18
- self.warmup_count = 0
19
- self.successful_requests = 0
20
- self.failed_requests = 0
21
- self.avg_response_time = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def check_endpoint_status(self) -> Dict:
24
  """Check if HF endpoint is available and initialized"""
25
  try:
26
- # Check if endpoint exists and is responsive
 
 
 
 
 
 
 
 
 
 
 
 
27
  headers = {"Authorization": f"Bearer {self.hf_token}"}
28
 
29
- # Simple model list check (doesn't trigger initialization)
30
  response = requests.get(
31
- f"{self.endpoint_url}/models",
32
  headers=headers,
33
- timeout=10
34
  )
35
 
36
  status_info = {
37
- 'available': response.status_code == 200,
38
  'status_code': response.status_code,
39
  'initialized': self._is_endpoint_initialized(response),
 
40
  'timestamp': time.time()
41
  }
42
 
 
 
 
43
  logger.info(f"HF Endpoint Status: {status_info}")
44
  return status_info
45
 
46
  except Exception as e:
47
- logger.error(f"HF endpoint check failed: {e}")
 
48
  return {
49
  'available': False,
50
  'status_code': None,
51
  'initialized': False,
52
- 'error': str(e),
53
  'timestamp': time.time()
54
  }
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def get_detailed_status(self) -> Dict:
57
  """Get detailed HF endpoint status with metrics"""
58
  try:
59
  headers = {"Authorization": f"Bearer {self.hf_token}"}
60
 
61
  # Get model info
 
62
  model_response = requests.get(
63
- f"{self.endpoint_url}/models",
64
  headers=headers,
65
  timeout=10
66
  )
@@ -68,8 +194,9 @@ class HFEndpointMonitor:
68
  # Get endpoint info if available
69
  endpoint_info = {}
70
  try:
 
71
  info_response = requests.get(
72
- f"{self.endpoint_url}/info",
73
  headers=headers,
74
  timeout=10
75
  )
@@ -108,63 +235,5 @@ class HFEndpointMonitor:
108
  'average_response_time': getattr(self, 'avg_response_time', 0)
109
  }
110
 
111
- def _is_endpoint_initialized(self, response) -> bool:
112
- """Determine if endpoint is fully initialized"""
113
- # If we get a model list, it's likely initialized
114
- try:
115
- data = response.json()
116
- return 'data' in data or 'models' in data
117
- except:
118
- return False
119
-
120
- def warm_up_endpoint(self) -> bool:
121
- """Send a warm-up request to initialize the endpoint"""
122
- try:
123
- logger.info("Warming up HF endpoint...")
124
- headers = {
125
- "Authorization": f"Bearer {self.hf_token}",
126
- "Content-Type": "application/json"
127
- }
128
-
129
- # Simple test request to trigger initialization
130
- payload = {
131
- "model": "meta-llama/Llama-2-7b-chat-hf", # Adjust as needed
132
- "messages": [{"role": "user", "content": "Hello"}],
133
- "max_tokens": 10
134
- }
135
-
136
- response = requests.post(
137
- f"{self.endpoint_url}/chat/completions",
138
- headers=headers,
139
- json=payload,
140
- timeout=30
141
- )
142
-
143
- success = response.status_code in [200, 201]
144
- if success:
145
- self.is_initialized = True
146
- self.warmup_count += 1
147
- logger.info("βœ… HF endpoint warmed up successfully")
148
- else:
149
- logger.warning(f"⚠️ HF endpoint warm-up response: {response.status_code}")
150
-
151
- return success
152
-
153
- except Exception as e:
154
- logger.error(f"HF endpoint warm-up failed: {e}")
155
- self.failed_requests += 1
156
- return False
157
-
158
- def get_status_summary(self) -> str:
159
- """Get human-readable status summary"""
160
- status = self.check_endpoint_status()
161
- if status['available']:
162
- if status.get('initialized', False):
163
- return "🟒 HF Endpoint: Available and Initialized"
164
- else:
165
- return "🟑 HF Endpoint: Available but Initializing"
166
- else:
167
- return "πŸ”΄ HF Endpoint: Unavailable"
168
-
169
  # Global instance
170
  hf_monitor = HFEndpointMonitor()
 
10
  """Monitor Hugging Face endpoint status and health"""
11
 
12
  def __init__(self):
13
+ # Clean the endpoint URL
14
+ raw_url = config.hf_api_url or ""
15
+ self.endpoint_url = self._clean_endpoint_url(raw_url)
16
  self.hf_token = config.hf_token
17
  self.is_initialized = False
18
  self.last_check = 0
19
  self.check_interval = 60 # Check every minute
20
+ self.warmup_attempts = 0
21
+ self.max_warmup_attempts = 3
22
+
23
+ logger.info(f"Initialized HF Monitor with URL: {self.endpoint_url}")
24
+
25
+ def _clean_endpoint_url(self, url: str) -> str:
26
+ """Clean and validate endpoint URL"""
27
+ if not url:
28
+ return ""
29
+
30
+ # Remove environment variable names if present
31
+ url = url.replace('hf_api_endpoint_url=', '')
32
+ url = url.replace('HF_API_ENDPOINT_URL=', '')
33
+
34
+ # Strip whitespace
35
+ url = url.strip()
36
+
37
+ # Ensure it starts with https://
38
+ if url and not url.startswith(('http://', 'https://')):
39
+ if 'huggingface.cloud' in url:
40
+ url = 'https://' + url
41
+ else:
42
+ url = 'https://' + url
43
+
44
+ # Remove trailing slashes but keep /v1 if present
45
+ if url.endswith('/'):
46
+ url = url.rstrip('/')
47
+
48
+ return url
49
 
50
  def check_endpoint_status(self) -> Dict:
51
  """Check if HF endpoint is available and initialized"""
52
  try:
53
+ if not self.endpoint_url or not self.hf_token:
54
+ return {
55
+ 'available': False,
56
+ 'status_code': None,
57
+ 'initialized': False,
58
+ 'error': 'URL or token not configured',
59
+ 'timestamp': time.time()
60
+ }
61
+
62
+ # Properly construct the models endpoint URL
63
+ models_url = f"{self.endpoint_url.rstrip('/')}/models"
64
+ logger.info(f"Checking HF endpoint at: {models_url}")
65
+
66
  headers = {"Authorization": f"Bearer {self.hf_token}"}
67
 
 
68
  response = requests.get(
69
+ models_url,
70
  headers=headers,
71
+ timeout=15
72
  )
73
 
74
  status_info = {
75
+ 'available': response.status_code in [200, 201],
76
  'status_code': response.status_code,
77
  'initialized': self._is_endpoint_initialized(response),
78
+ 'response_time': response.elapsed.total_seconds(),
79
  'timestamp': time.time()
80
  }
81
 
82
+ if response.status_code not in [200, 201]:
83
+ status_info['error'] = f"HTTP {response.status_code}: {response.text[:200]}"
84
+
85
  logger.info(f"HF Endpoint Status: {status_info}")
86
  return status_info
87
 
88
  except Exception as e:
89
+ error_msg = str(e)
90
+ logger.error(f"HF endpoint check failed: {error_msg}")
91
  return {
92
  'available': False,
93
  'status_code': None,
94
  'initialized': False,
95
+ 'error': error_msg,
96
  'timestamp': time.time()
97
  }
98
 
99
+ def _is_endpoint_initialized(self, response) -> bool:
100
+ """Determine if endpoint is fully initialized"""
101
+ try:
102
+ data = response.json()
103
+ return 'data' in data or 'models' in data
104
+ except:
105
+ return response.status_code in [200, 201]
106
+
107
+ def warm_up_endpoint(self) -> bool:
108
+ """Send a warm-up request to initialize the endpoint"""
109
+ try:
110
+ if not self.endpoint_url or not self.hf_token:
111
+ logger.warning("Cannot warm up HF endpoint - URL or token not configured")
112
+ return False
113
+
114
+ self.warmup_attempts += 1
115
+ logger.info(f"Warming up HF endpoint (attempt {self.warmup_attempts})...")
116
+
117
+ headers = {
118
+ "Authorization": f"Bearer {self.hf_token}",
119
+ "Content-Type": "application/json"
120
+ }
121
+
122
+ # Construct proper chat completions URL
123
+ chat_url = f"{self.endpoint_url.rstrip('/')}/chat/completions"
124
+ logger.info(f"Sending warm-up request to: {chat_url}")
125
+
126
+ payload = {
127
+ "model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
128
+ "messages": [{"role": "user", "content": "Hello"}],
129
+ "max_tokens": 10,
130
+ "stream": False
131
+ }
132
+
133
+ response = requests.post(
134
+ chat_url,
135
+ headers=headers,
136
+ json=payload,
137
+ timeout=45 # Longer timeout for cold start
138
+ )
139
+
140
+ success = response.status_code in [200, 201]
141
+ if success:
142
+ self.is_initialized = True
143
+ self.warmup_attempts = 0 # Reset on success
144
+ logger.info("βœ… HF endpoint warmed up successfully")
145
+ else:
146
+ logger.warning(f"⚠️ HF endpoint warm-up response: {response.status_code}")
147
+ logger.debug(f"Response body: {response.text[:500]}")
148
+
149
+ return success
150
+
151
+ except Exception as e:
152
+ logger.error(f"HF endpoint warm-up failed: {e}")
153
+ return False
154
+
155
+ def get_status_summary(self) -> str:
156
+ """Get human-readable status summary"""
157
+ status = self.check_endpoint_status()
158
+ if status['available']:
159
+ if status.get('initialized', False):
160
+ return "🟒 HF Endpoint: Available and Initialized"
161
+ else:
162
+ return "🟑 HF Endpoint: Available but Initializing"
163
+ else:
164
+ return "πŸ”΄ HF Endpoint: Unavailable"
165
+
166
+ def handle_scale_to_zero(self) -> bool:
167
+ """Handle scale-to-zero behavior with user feedback"""
168
+ logger.info("HF endpoint appears to be scaled to zero. Attempting to wake it up...")
169
+
170
+ # Try to warm up the endpoint
171
+ for attempt in range(self.max_warmup_attempts):
172
+ logger.info(f"Wake-up attempt {attempt + 1}/{self.max_warmup_attempts}")
173
+ if self.warm_up_endpoint():
174
+ logger.info("βœ… HF endpoint successfully woken up!")
175
+ return True
176
+ time.sleep(10) # Wait between attempts
177
+
178
+ logger.error("❌ Failed to wake up HF endpoint after all attempts")
179
+ return False
180
+
181
  def get_detailed_status(self) -> Dict:
182
  """Get detailed HF endpoint status with metrics"""
183
  try:
184
  headers = {"Authorization": f"Bearer {self.hf_token}"}
185
 
186
  # Get model info
187
+ models_url = f"{self.endpoint_url.rstrip('/')}/models"
188
  model_response = requests.get(
189
+ models_url,
190
  headers=headers,
191
  timeout=10
192
  )
 
194
  # Get endpoint info if available
195
  endpoint_info = {}
196
  try:
197
+ info_url = f"{self.endpoint_url.rstrip('/')}/info"
198
  info_response = requests.get(
199
+ info_url,
200
  headers=headers,
201
  timeout=10
202
  )
 
235
  'average_response_time': getattr(self, 'avg_response_time', 0)
236
  }
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # Global instance
239
  hf_monitor = HFEndpointMonitor()
test_hf_url_fix.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ # Add project root to path
5
+ project_root = Path(__file__).parent
6
+ sys.path.append(str(project_root))
7
+
8
+ from utils.config import config
9
+ from services.hf_endpoint_monitor import hf_monitor
10
+
11
+ def test_hf_url_fix():
12
+ """Test the HF endpoint URL fix"""
13
+ print("=== HF Endpoint URL Fix Test ===")
14
+ print()
15
+
16
+ # Test configuration parsing
17
+ print("1. Testing Configuration Parsing:")
18
+ print(f" Raw HF_API_ENDPOINT_URL: {config.hf_api_url}")
19
+ print(f" Parsed endpoint URL: {getattr(hf_monitor, 'endpoint_url', 'Not initialized')}")
20
+ print()
21
+
22
+ # Test URL cleaning
23
+ print("2. Testing URL Cleaning:")
24
+ test_urls = [
25
+ "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
26
+ "hf_api_endpoint_url=https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
27
+ "HF_API_ENDPOINT_URL=https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
28
+ "zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/"
29
+ ]
30
+
31
+ for url in test_urls:
32
+ cleaned = hf_monitor._clean_endpoint_url(url) if hasattr(hf_monitor, '_clean_endpoint_url') else "Method not available"
33
+ print(f" Input: {url}")
34
+ print(f" Output: {cleaned}")
35
+ print()
36
+
37
+ # Test HF endpoint status (if token is available)
38
+ print("3. Testing HF Endpoint Status:")
39
+ try:
40
+ status = hf_monitor.check_endpoint_status()
41
+ print(f" Available: {status.get('available', 'Unknown')}")
42
+ print(f" Status Code: {status.get('status_code', 'Unknown')}")
43
+ print(f" Initialized: {status.get('initialized', 'Unknown')}")
44
+ if 'error' in status:
45
+ print(f" Error: {status['error']}")
46
+ except Exception as e:
47
+ print(f" Error checking status: {e}")
48
+
49
+ print()
50
+ print("πŸŽ‰ HF Endpoint URL Fix Test Completed!")
51
+
52
+ if __name__ == "__main__":
53
+ test_hf_url_fix()
utils/config.py CHANGED
@@ -39,16 +39,29 @@ class Config:
39
  # Remove any trailing URL encoding artifacts
40
  url = url.rstrip('%0a').rstrip('%0d')
41
 
42
- # Validate URL format - if it looks like a URL but missing scheme
43
- if url and not re.match(r'^https?://', url):
 
 
 
 
 
 
 
 
 
44
  # Check if it looks like a domain
45
- if re.match(r'^[a-zA-Z0-9.-]+(?:\.[a-zA-Z]{2,})+', url) or 'ngrok' in url:
46
- # Assume https for ngrok and domains
47
  url = 'https://' + url
48
  else:
49
- # Otherwise default to http
50
- url = 'http://' + url
51
-
 
 
 
 
 
52
  return url
53
 
54
  def _sanitize_host(self, host: str) -> str:
 
39
  # Remove any trailing URL encoding artifacts
40
  url = url.rstrip('%0a').rstrip('%0d')
41
 
42
+ # Handle the specific case where environment variable name is included
43
+ if 'hf_api_endpoint_url=' in url.lower():
44
+ # Extract the actual URL part
45
+ url = url.split('=')[-1]
46
+
47
+ # Remove any protocol prefix issues
48
+ url = url.replace('hf_api_endpoint_url=', '')
49
+ url = url.replace('HF_API_ENDPOINT_URL=', '')
50
+
51
+ # Ensure proper URL format
52
+ if url and not url.startswith(('http://', 'https://')):
53
  # Check if it looks like a domain
54
+ if re.match(r'^[a-zA-Z0-9.-]+(?:\.[a-zA-Z]{2,})+', url) or 'huggingface.cloud' in url:
 
55
  url = 'https://' + url
56
  else:
57
+ url = 'https://' + url
58
+
59
+ # Remove double slashes (but keep ://)
60
+ if '://' in url:
61
+ protocol, rest = url.split('://', 1)
62
+ rest = re.sub(r'//+', '/', rest)
63
+ url = protocol + '://' + rest
64
+
65
  return url
66
 
67
  def _sanitize_host(self, host: str) -> str: