feat: server side request timing
Browse files- app/api/v1/endpoints.py +12 -2
- app/middleware/request_middleware.py +16 -14
- app/models/schemas.py +18 -2
- tests/curl_commands.md +16 -1
- tests/simple_test.py +24 -8
- tests/test_performance_timing.py +273 -0
app/api/v1/endpoints.py
CHANGED
|
@@ -160,6 +160,7 @@ async def translate_endpoint(
|
|
| 160 |
Returns translated text with source language, inference time, and request tracking.
|
| 161 |
"""
|
| 162 |
request_id = request.state.request_id
|
|
|
|
| 163 |
|
| 164 |
# Validate text length
|
| 165 |
if len(translation_request.text) > settings.max_text_length:
|
|
@@ -214,6 +215,9 @@ async def translate_endpoint(
|
|
| 214 |
inference_time=inference_time
|
| 215 |
)
|
| 216 |
|
|
|
|
|
|
|
|
|
|
| 217 |
return TranslationResponse(
|
| 218 |
translated_text=translated_text,
|
| 219 |
source_language=source_lang,
|
|
@@ -221,7 +225,8 @@ async def translate_endpoint(
|
|
| 221 |
inference_time=inference_time,
|
| 222 |
character_count=character_count,
|
| 223 |
timestamp=full_date,
|
| 224 |
-
request_id=request_id
|
|
|
|
| 225 |
)
|
| 226 |
|
| 227 |
except Exception as e:
|
|
@@ -272,6 +277,7 @@ async def detect_language_endpoint(
|
|
| 272 |
Response includes FLORES-200 language code, native name, and confidence score.
|
| 273 |
"""
|
| 274 |
request_id = request.state.request_id
|
|
|
|
| 275 |
|
| 276 |
# Validate text length
|
| 277 |
if len(detection_request.text) > 1000:
|
|
@@ -318,6 +324,9 @@ async def detect_language_endpoint(
|
|
| 318 |
character_count=character_count
|
| 319 |
)
|
| 320 |
|
|
|
|
|
|
|
|
|
|
| 321 |
return LanguageDetectionResponse(
|
| 322 |
detected_language=detected_lang_code,
|
| 323 |
language_name=language_name,
|
|
@@ -326,7 +335,8 @@ async def detect_language_endpoint(
|
|
| 326 |
is_english=is_english,
|
| 327 |
character_count=character_count,
|
| 328 |
timestamp=full_date,
|
| 329 |
-
request_id=request_id
|
|
|
|
| 330 |
)
|
| 331 |
|
| 332 |
except Exception as e:
|
|
|
|
| 160 |
Returns translated text with source language, inference time, and request tracking.
|
| 161 |
"""
|
| 162 |
request_id = request.state.request_id
|
| 163 |
+
request_start_time = time.time()
|
| 164 |
|
| 165 |
# Validate text length
|
| 166 |
if len(translation_request.text) > settings.max_text_length:
|
|
|
|
| 215 |
inference_time=inference_time
|
| 216 |
)
|
| 217 |
|
| 218 |
+
# Calculate total request time
|
| 219 |
+
total_request_time = time.time() - request_start_time
|
| 220 |
+
|
| 221 |
return TranslationResponse(
|
| 222 |
translated_text=translated_text,
|
| 223 |
source_language=source_lang,
|
|
|
|
| 225 |
inference_time=inference_time,
|
| 226 |
character_count=character_count,
|
| 227 |
timestamp=full_date,
|
| 228 |
+
request_id=request_id,
|
| 229 |
+
total_time=total_request_time
|
| 230 |
)
|
| 231 |
|
| 232 |
except Exception as e:
|
|
|
|
| 277 |
Response includes FLORES-200 language code, native name, and confidence score.
|
| 278 |
"""
|
| 279 |
request_id = request.state.request_id
|
| 280 |
+
request_start_time = time.time()
|
| 281 |
|
| 282 |
# Validate text length
|
| 283 |
if len(detection_request.text) > 1000:
|
|
|
|
| 324 |
character_count=character_count
|
| 325 |
)
|
| 326 |
|
| 327 |
+
# Calculate total request time
|
| 328 |
+
total_request_time = time.time() - request_start_time
|
| 329 |
+
|
| 330 |
return LanguageDetectionResponse(
|
| 331 |
detected_language=detected_lang_code,
|
| 332 |
language_name=language_name,
|
|
|
|
| 335 |
is_english=is_english,
|
| 336 |
character_count=character_count,
|
| 337 |
timestamp=full_date,
|
| 338 |
+
request_id=request_id,
|
| 339 |
+
total_time=total_request_time
|
| 340 |
)
|
| 341 |
|
| 342 |
except Exception as e:
|
app/middleware/request_middleware.py
CHANGED
|
@@ -15,10 +15,10 @@ async def request_middleware(request: Request, call_next):
|
|
| 15 |
"""Middleware for request tracking, metrics, and logging"""
|
| 16 |
start_time = time.time()
|
| 17 |
request_id = generate_request_id()
|
| 18 |
-
|
| 19 |
# Add request ID to request state
|
| 20 |
request.state.request_id = request_id
|
| 21 |
-
|
| 22 |
# Log request
|
| 23 |
logger.info(
|
| 24 |
"request_started",
|
|
@@ -28,25 +28,25 @@ async def request_middleware(request: Request, call_next):
|
|
| 28 |
client_ip=request.client.host if request.client else "unknown",
|
| 29 |
user_agent=request.headers.get("user-agent", "unknown")
|
| 30 |
)
|
| 31 |
-
|
| 32 |
try:
|
| 33 |
response = await call_next(request)
|
| 34 |
-
|
| 35 |
# Calculate duration
|
| 36 |
duration = time.time() - start_time
|
| 37 |
-
|
| 38 |
# Update metrics
|
| 39 |
REQUEST_COUNT.labels(
|
| 40 |
method=request.method,
|
| 41 |
endpoint=request.url.path,
|
| 42 |
status=response.status_code
|
| 43 |
).inc()
|
| 44 |
-
|
| 45 |
REQUEST_DURATION.labels(
|
| 46 |
method=request.method,
|
| 47 |
endpoint=request.url.path
|
| 48 |
).observe(duration)
|
| 49 |
-
|
| 50 |
# Log response
|
| 51 |
logger.info(
|
| 52 |
"request_completed",
|
|
@@ -54,18 +54,20 @@ async def request_middleware(request: Request, call_next):
|
|
| 54 |
status_code=response.status_code,
|
| 55 |
duration=duration
|
| 56 |
)
|
| 57 |
-
|
| 58 |
-
# Add request ID to response headers
|
| 59 |
response.headers["X-Request-ID"] = request_id
|
| 60 |
-
|
|
|
|
|
|
|
| 61 |
return response
|
| 62 |
-
|
| 63 |
except Exception as e:
|
| 64 |
duration = time.time() - start_time
|
| 65 |
-
|
| 66 |
# Update error metrics
|
| 67 |
ERROR_COUNT.labels(error_type=type(e).__name__).inc()
|
| 68 |
-
|
| 69 |
# Log error
|
| 70 |
logger.error(
|
| 71 |
"request_failed",
|
|
@@ -74,5 +76,5 @@ async def request_middleware(request: Request, call_next):
|
|
| 74 |
error_type=type(e).__name__,
|
| 75 |
duration=duration
|
| 76 |
)
|
| 77 |
-
|
| 78 |
raise
|
|
|
|
| 15 |
"""Middleware for request tracking, metrics, and logging"""
|
| 16 |
start_time = time.time()
|
| 17 |
request_id = generate_request_id()
|
| 18 |
+
|
| 19 |
# Add request ID to request state
|
| 20 |
request.state.request_id = request_id
|
| 21 |
+
|
| 22 |
# Log request
|
| 23 |
logger.info(
|
| 24 |
"request_started",
|
|
|
|
| 28 |
client_ip=request.client.host if request.client else "unknown",
|
| 29 |
user_agent=request.headers.get("user-agent", "unknown")
|
| 30 |
)
|
| 31 |
+
|
| 32 |
try:
|
| 33 |
response = await call_next(request)
|
| 34 |
+
|
| 35 |
# Calculate duration
|
| 36 |
duration = time.time() - start_time
|
| 37 |
+
|
| 38 |
# Update metrics
|
| 39 |
REQUEST_COUNT.labels(
|
| 40 |
method=request.method,
|
| 41 |
endpoint=request.url.path,
|
| 42 |
status=response.status_code
|
| 43 |
).inc()
|
| 44 |
+
|
| 45 |
REQUEST_DURATION.labels(
|
| 46 |
method=request.method,
|
| 47 |
endpoint=request.url.path
|
| 48 |
).observe(duration)
|
| 49 |
+
|
| 50 |
# Log response
|
| 51 |
logger.info(
|
| 52 |
"request_completed",
|
|
|
|
| 54 |
status_code=response.status_code,
|
| 55 |
duration=duration
|
| 56 |
)
|
| 57 |
+
|
| 58 |
+
# Add request ID and timing to response headers
|
| 59 |
response.headers["X-Request-ID"] = request_id
|
| 60 |
+
response.headers["X-Response-Time"] = f"{duration:.3f}s"
|
| 61 |
+
response.headers["X-Response-Time-Ms"] = f"{duration * 1000:.1f}"
|
| 62 |
+
|
| 63 |
return response
|
| 64 |
+
|
| 65 |
except Exception as e:
|
| 66 |
duration = time.time() - start_time
|
| 67 |
+
|
| 68 |
# Update error metrics
|
| 69 |
ERROR_COUNT.labels(error_type=type(e).__name__).inc()
|
| 70 |
+
|
| 71 |
# Log error
|
| 72 |
logger.error(
|
| 73 |
"request_failed",
|
|
|
|
| 76 |
error_type=type(e).__name__,
|
| 77 |
duration=duration
|
| 78 |
)
|
| 79 |
+
|
| 80 |
raise
|
app/models/schemas.py
CHANGED
|
@@ -126,6 +126,13 @@ class TranslationResponse(BaseModel):
|
|
| 126 |
example="550e8400-e29b-41d4-a716-446655440000",
|
| 127 |
title="Request ID"
|
| 128 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
class Config:
|
| 131 |
json_schema_extra = {
|
|
@@ -136,7 +143,8 @@ class TranslationResponse(BaseModel):
|
|
| 136 |
"inference_time": 0.234,
|
| 137 |
"character_count": 17,
|
| 138 |
"timestamp": "Monday | 2024-06-21 | 14:30:25",
|
| 139 |
-
"request_id": "550e8400-e29b-41d4-a716-446655440000"
|
|
|
|
| 140 |
}
|
| 141 |
}
|
| 142 |
|
|
@@ -253,6 +261,13 @@ class LanguageDetectionResponse(BaseModel):
|
|
| 253 |
example="550e8400-e29b-41d4-a716-446655440000",
|
| 254 |
title="Request ID"
|
| 255 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
class Config:
|
| 258 |
json_schema_extra = {
|
|
@@ -264,7 +279,8 @@ class LanguageDetectionResponse(BaseModel):
|
|
| 264 |
"is_english": False,
|
| 265 |
"character_count": 17,
|
| 266 |
"timestamp": "Monday | 2024-06-21 | 14:30:25",
|
| 267 |
-
"request_id": "550e8400-e29b-41d4-a716-446655440000"
|
|
|
|
| 268 |
}
|
| 269 |
}
|
| 270 |
|
|
|
|
| 126 |
example="550e8400-e29b-41d4-a716-446655440000",
|
| 127 |
title="Request ID"
|
| 128 |
)
|
| 129 |
+
total_time: float = Field(
|
| 130 |
+
...,
|
| 131 |
+
description="Total request processing time in seconds",
|
| 132 |
+
example=1.234,
|
| 133 |
+
ge=0,
|
| 134 |
+
title="Total Processing Time (seconds)"
|
| 135 |
+
)
|
| 136 |
|
| 137 |
class Config:
|
| 138 |
json_schema_extra = {
|
|
|
|
| 143 |
"inference_time": 0.234,
|
| 144 |
"character_count": 17,
|
| 145 |
"timestamp": "Monday | 2024-06-21 | 14:30:25",
|
| 146 |
+
"request_id": "550e8400-e29b-41d4-a716-446655440000",
|
| 147 |
+
"total_time": 1.234
|
| 148 |
}
|
| 149 |
}
|
| 150 |
|
|
|
|
| 261 |
example="550e8400-e29b-41d4-a716-446655440000",
|
| 262 |
title="Request ID"
|
| 263 |
)
|
| 264 |
+
total_time: float = Field(
|
| 265 |
+
...,
|
| 266 |
+
description="Total request processing time in seconds",
|
| 267 |
+
example=0.045,
|
| 268 |
+
ge=0,
|
| 269 |
+
title="Total Processing Time (seconds)"
|
| 270 |
+
)
|
| 271 |
|
| 272 |
class Config:
|
| 273 |
json_schema_extra = {
|
|
|
|
| 279 |
"is_english": False,
|
| 280 |
"character_count": 17,
|
| 281 |
"timestamp": "Monday | 2024-06-21 | 14:30:25",
|
| 282 |
+
"request_id": "550e8400-e29b-41d4-a716-446655440000",
|
| 283 |
+
"total_time": 0.045
|
| 284 |
}
|
| 285 |
}
|
| 286 |
|
tests/curl_commands.md
CHANGED
|
@@ -227,11 +227,26 @@ curl -s "$API_URL/languages/popular" | jq '.'
|
|
| 227 |
curl -s "$API_URL/languages" > all_languages.json
|
| 228 |
```
|
| 229 |
|
| 230 |
-
### Check Response Headers
|
| 231 |
```bash
|
| 232 |
curl -I "$API_URL/health"
|
| 233 |
```
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
### Measure Response Time
|
| 236 |
```bash
|
| 237 |
curl -w "@curl-format.txt" -s -o /dev/null "$API_URL/translate" \
|
|
|
|
| 227 |
curl -s "$API_URL/languages" > all_languages.json
|
| 228 |
```
|
| 229 |
|
| 230 |
+
### Check Response Headers (Including Timing)
|
| 231 |
```bash
|
| 232 |
curl -I "$API_URL/health"
|
| 233 |
```
|
| 234 |
|
| 235 |
+
### View Response Headers with Translation
|
| 236 |
+
```bash
|
| 237 |
+
curl -v -X POST "$API_URL/translate" \
|
| 238 |
+
-H "Content-Type: application/json" \
|
| 239 |
+
-d '{"text": "Hello", "target_language": "swh_Latn"}'
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
### Extract Timing Headers Only
|
| 243 |
+
```bash
|
| 244 |
+
curl -s -D - -X POST "$API_URL/translate" \
|
| 245 |
+
-H "Content-Type: application/json" \
|
| 246 |
+
-d '{"text": "Hello", "target_language": "swh_Latn"}' \
|
| 247 |
+
| grep -E "X-Response-Time|X-Request-ID"
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
### Measure Response Time
|
| 251 |
```bash
|
| 252 |
curl -w "@curl-format.txt" -s -o /dev/null "$API_URL/translate" \
|
tests/simple_test.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
"""
|
| 2 |
-
Simple API test script using requests
|
| 3 |
"""
|
| 4 |
|
| 5 |
import requests
|
| 6 |
import json
|
|
|
|
| 7 |
|
| 8 |
# API base URL - change this to test different environments
|
| 9 |
API_URL = "https://sematech-sema-api.hf.space"
|
|
@@ -13,8 +14,12 @@ def test_health():
|
|
| 13 |
"""Test basic health check"""
|
| 14 |
print("[TEST] Health check...")
|
| 15 |
|
|
|
|
| 16 |
response = requests.get(f"{API_URL}/status")
|
|
|
|
|
|
|
| 17 |
print(f"Status: {response.status_code}")
|
|
|
|
| 18 |
|
| 19 |
if response.status_code == 200:
|
| 20 |
data = response.json()
|
|
@@ -36,13 +41,16 @@ def test_translation():
|
|
| 36 |
"target_language": "eng_Latn"
|
| 37 |
}
|
| 38 |
|
|
|
|
| 39 |
response = requests.post(
|
| 40 |
f"{API_URL}/translate",
|
| 41 |
headers={"Content-Type": "application/json"},
|
| 42 |
json=data
|
| 43 |
)
|
|
|
|
| 44 |
|
| 45 |
print(f"Status: {response.status_code}")
|
|
|
|
| 46 |
|
| 47 |
if response.status_code == 200:
|
| 48 |
result = response.json()
|
|
@@ -50,7 +58,9 @@ def test_translation():
|
|
| 50 |
print(f"Original: {data['text']}")
|
| 51 |
print(f"Translation: {result['translated_text']}")
|
| 52 |
print(f"Source language: {result['source_language']}")
|
| 53 |
-
print(f"
|
|
|
|
|
|
|
| 54 |
else:
|
| 55 |
print(f"[FAIL] Translation failed")
|
| 56 |
print(f"Status code: {response.status_code}")
|
|
@@ -67,28 +77,34 @@ def test_languages():
|
|
| 67 |
print("[TEST] Language endpoints...")
|
| 68 |
|
| 69 |
# Test all languages
|
|
|
|
| 70 |
response = requests.get(f"{API_URL}/languages")
|
|
|
|
| 71 |
if response.status_code == 200:
|
| 72 |
data = response.json()
|
| 73 |
-
print(f"[PASS] Found {data['total_count']} supported languages")
|
| 74 |
else:
|
| 75 |
-
print(f"[FAIL] Failed to get languages")
|
| 76 |
|
| 77 |
# Test popular languages
|
|
|
|
| 78 |
response = requests.get(f"{API_URL}/languages/popular")
|
|
|
|
| 79 |
if response.status_code == 200:
|
| 80 |
data = response.json()
|
| 81 |
-
print(f"[PASS] Found {data['total_count']} popular languages")
|
| 82 |
else:
|
| 83 |
-
print(f"[FAIL] Failed to get popular languages")
|
| 84 |
|
| 85 |
# Test specific language
|
|
|
|
| 86 |
response = requests.get(f"{API_URL}/languages/swh_Latn")
|
|
|
|
| 87 |
if response.status_code == 200:
|
| 88 |
data = response.json()
|
| 89 |
-
print(f"[PASS] Swahili info: {data['name']} ({data['native_name']})")
|
| 90 |
else:
|
| 91 |
-
print(f"[FAIL] Failed to get Swahili info")
|
| 92 |
|
| 93 |
print("-" * 50)
|
| 94 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
Simple API test script using requests with performance tracking
|
| 3 |
"""
|
| 4 |
|
| 5 |
import requests
|
| 6 |
import json
|
| 7 |
+
import time
|
| 8 |
|
| 9 |
# API base URL - change this to test different environments
|
| 10 |
API_URL = "https://sematech-sema-api.hf.space"
|
|
|
|
| 14 |
"""Test basic health check"""
|
| 15 |
print("[TEST] Health check...")
|
| 16 |
|
| 17 |
+
start_time = time.time()
|
| 18 |
response = requests.get(f"{API_URL}/status")
|
| 19 |
+
response_time = time.time() - start_time
|
| 20 |
+
|
| 21 |
print(f"Status: {response.status_code}")
|
| 22 |
+
print(f"Response time: {response_time:.3f}s")
|
| 23 |
|
| 24 |
if response.status_code == 200:
|
| 25 |
data = response.json()
|
|
|
|
| 41 |
"target_language": "eng_Latn"
|
| 42 |
}
|
| 43 |
|
| 44 |
+
start_time = time.time()
|
| 45 |
response = requests.post(
|
| 46 |
f"{API_URL}/translate",
|
| 47 |
headers={"Content-Type": "application/json"},
|
| 48 |
json=data
|
| 49 |
)
|
| 50 |
+
response_time = time.time() - start_time
|
| 51 |
|
| 52 |
print(f"Status: {response.status_code}")
|
| 53 |
+
print(f"Response time: {response_time:.3f}s")
|
| 54 |
|
| 55 |
if response.status_code == 200:
|
| 56 |
result = response.json()
|
|
|
|
| 58 |
print(f"Original: {data['text']}")
|
| 59 |
print(f"Translation: {result['translated_text']}")
|
| 60 |
print(f"Source language: {result['source_language']}")
|
| 61 |
+
print(f"Model inference time: {result['inference_time']:.3f}s")
|
| 62 |
+
print(f"Total request time: {response_time:.3f}s")
|
| 63 |
+
print(f"Network overhead: {(response_time - result['inference_time']):.3f}s")
|
| 64 |
else:
|
| 65 |
print(f"[FAIL] Translation failed")
|
| 66 |
print(f"Status code: {response.status_code}")
|
|
|
|
| 77 |
print("[TEST] Language endpoints...")
|
| 78 |
|
| 79 |
# Test all languages
|
| 80 |
+
start_time = time.time()
|
| 81 |
response = requests.get(f"{API_URL}/languages")
|
| 82 |
+
response_time = time.time() - start_time
|
| 83 |
if response.status_code == 200:
|
| 84 |
data = response.json()
|
| 85 |
+
print(f"[PASS] Found {data['total_count']} supported languages ({response_time:.3f}s)")
|
| 86 |
else:
|
| 87 |
+
print(f"[FAIL] Failed to get languages ({response_time:.3f}s)")
|
| 88 |
|
| 89 |
# Test popular languages
|
| 90 |
+
start_time = time.time()
|
| 91 |
response = requests.get(f"{API_URL}/languages/popular")
|
| 92 |
+
response_time = time.time() - start_time
|
| 93 |
if response.status_code == 200:
|
| 94 |
data = response.json()
|
| 95 |
+
print(f"[PASS] Found {data['total_count']} popular languages ({response_time:.3f}s)")
|
| 96 |
else:
|
| 97 |
+
print(f"[FAIL] Failed to get popular languages ({response_time:.3f}s)")
|
| 98 |
|
| 99 |
# Test specific language
|
| 100 |
+
start_time = time.time()
|
| 101 |
response = requests.get(f"{API_URL}/languages/swh_Latn")
|
| 102 |
+
response_time = time.time() - start_time
|
| 103 |
if response.status_code == 200:
|
| 104 |
data = response.json()
|
| 105 |
+
print(f"[PASS] Swahili info: {data['name']} ({data['native_name']}) ({response_time:.3f}s)")
|
| 106 |
else:
|
| 107 |
+
print(f"[FAIL] Failed to get Swahili info ({response_time:.3f}s)")
|
| 108 |
|
| 109 |
print("-" * 50)
|
| 110 |
|
tests/test_performance_timing.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Performance timing test script - demonstrates server-side timing implementation
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
import json
|
| 7 |
+
import time
|
| 8 |
+
import statistics
|
| 9 |
+
|
| 10 |
+
def test_server_side_timing(api_url="https://sematech-sema-api.hf.space"):
|
| 11 |
+
"""Test server-side timing implementation"""
|
| 12 |
+
|
| 13 |
+
print("[INFO] Testing Server-Side Performance Timing")
|
| 14 |
+
print("=" * 60)
|
| 15 |
+
|
| 16 |
+
# Test translation endpoint timing
|
| 17 |
+
print("\n[TEST] Translation Endpoint Timing")
|
| 18 |
+
print("-" * 40)
|
| 19 |
+
|
| 20 |
+
test_cases = [
|
| 21 |
+
{"text": "Hello", "target_language": "swh_Latn"},
|
| 22 |
+
{"text": "Habari ya asubuhi", "target_language": "eng_Latn"},
|
| 23 |
+
{"text": "Good morning everyone", "target_language": "fra_Latn"},
|
| 24 |
+
{"text": "Bonjour tout le monde", "target_language": "eng_Latn"},
|
| 25 |
+
{"text": "How are you doing today?", "target_language": "swh_Latn"}
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
translation_times = []
|
| 29 |
+
|
| 30 |
+
for i, test_case in enumerate(test_cases, 1):
|
| 31 |
+
print(f"\n{i}. Testing: '{test_case['text']}'")
|
| 32 |
+
|
| 33 |
+
# Measure client-side time
|
| 34 |
+
client_start = time.time()
|
| 35 |
+
|
| 36 |
+
response = requests.post(
|
| 37 |
+
f"{api_url}/translate",
|
| 38 |
+
headers={"Content-Type": "application/json"},
|
| 39 |
+
json=test_case,
|
| 40 |
+
timeout=30
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
client_total = time.time() - client_start
|
| 44 |
+
|
| 45 |
+
if response.status_code == 200:
|
| 46 |
+
data = response.json()
|
| 47 |
+
|
| 48 |
+
# Extract timing information
|
| 49 |
+
server_total = data.get('total_time', 0)
|
| 50 |
+
inference_time = data.get('inference_time', 0)
|
| 51 |
+
network_overhead = client_total - server_total
|
| 52 |
+
processing_overhead = server_total - inference_time
|
| 53 |
+
|
| 54 |
+
# Extract response headers
|
| 55 |
+
response_time_header = response.headers.get('X-Response-Time', 'N/A')
|
| 56 |
+
response_time_ms = response.headers.get('X-Response-Time-Ms', 'N/A')
|
| 57 |
+
request_id = response.headers.get('X-Request-ID', 'N/A')
|
| 58 |
+
|
| 59 |
+
print(f" Translation: '{data['translated_text']}'")
|
| 60 |
+
print(f" [TIMING] Client total: {client_total:.3f}s")
|
| 61 |
+
print(f" [TIMING] Server total: {server_total:.3f}s")
|
| 62 |
+
print(f" [TIMING] Model inference: {inference_time:.3f}s")
|
| 63 |
+
print(f" [TIMING] Processing overhead: {processing_overhead:.3f}s")
|
| 64 |
+
print(f" [TIMING] Network overhead: {network_overhead:.3f}s")
|
| 65 |
+
print(f" [HEADERS] X-Response-Time: {response_time_header}")
|
| 66 |
+
print(f" [HEADERS] X-Response-Time-Ms: {response_time_ms}")
|
| 67 |
+
print(f" [HEADERS] X-Request-ID: {request_id}")
|
| 68 |
+
|
| 69 |
+
translation_times.append({
|
| 70 |
+
'client_total': client_total,
|
| 71 |
+
'server_total': server_total,
|
| 72 |
+
'inference_time': inference_time,
|
| 73 |
+
'processing_overhead': processing_overhead,
|
| 74 |
+
'network_overhead': network_overhead
|
| 75 |
+
})
|
| 76 |
+
|
| 77 |
+
else:
|
| 78 |
+
print(f" [FAIL] HTTP {response.status_code}")
|
| 79 |
+
|
| 80 |
+
# Test language detection timing
|
| 81 |
+
print(f"\n[TEST] Language Detection Timing")
|
| 82 |
+
print("-" * 40)
|
| 83 |
+
|
| 84 |
+
detection_cases = [
|
| 85 |
+
"Hello world",
|
| 86 |
+
"Habari ya dunia",
|
| 87 |
+
"Bonjour le monde",
|
| 88 |
+
"Hola mundo",
|
| 89 |
+
"Good morning everyone, how are you doing today?"
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
detection_times = []
|
| 93 |
+
|
| 94 |
+
for i, text in enumerate(detection_cases, 1):
|
| 95 |
+
print(f"\n{i}. Detecting: '{text}'")
|
| 96 |
+
|
| 97 |
+
client_start = time.time()
|
| 98 |
+
|
| 99 |
+
response = requests.post(
|
| 100 |
+
f"{api_url}/detect-language",
|
| 101 |
+
headers={"Content-Type": "application/json"},
|
| 102 |
+
json={"text": text},
|
| 103 |
+
timeout=10
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
client_total = time.time() - client_start
|
| 107 |
+
|
| 108 |
+
if response.status_code == 200:
|
| 109 |
+
data = response.json()
|
| 110 |
+
|
| 111 |
+
server_total = data.get('total_time', 0)
|
| 112 |
+
network_overhead = client_total - server_total
|
| 113 |
+
|
| 114 |
+
response_time_header = response.headers.get('X-Response-Time', 'N/A')
|
| 115 |
+
|
| 116 |
+
print(f" Detected: {data['detected_language']} ({data['language_name']})")
|
| 117 |
+
print(f" Confidence: {data['confidence']:.3f}")
|
| 118 |
+
print(f" [TIMING] Client total: {client_total:.3f}s")
|
| 119 |
+
print(f" [TIMING] Server total: {server_total:.3f}s")
|
| 120 |
+
print(f" [TIMING] Network overhead: {network_overhead:.3f}s")
|
| 121 |
+
print(f" [HEADERS] X-Response-Time: {response_time_header}")
|
| 122 |
+
|
| 123 |
+
detection_times.append({
|
| 124 |
+
'client_total': client_total,
|
| 125 |
+
'server_total': server_total,
|
| 126 |
+
'network_overhead': network_overhead
|
| 127 |
+
})
|
| 128 |
+
|
| 129 |
+
else:
|
| 130 |
+
print(f" [FAIL] HTTP {response.status_code}")
|
| 131 |
+
|
| 132 |
+
# Performance summary
|
| 133 |
+
print(f"\n[SUMMARY] Performance Analysis")
|
| 134 |
+
print("=" * 60)
|
| 135 |
+
|
| 136 |
+
if translation_times:
|
| 137 |
+
print(f"\nTranslation Performance:")
|
| 138 |
+
avg_client = statistics.mean([t['client_total'] for t in translation_times])
|
| 139 |
+
avg_server = statistics.mean([t['server_total'] for t in translation_times])
|
| 140 |
+
avg_inference = statistics.mean([t['inference_time'] for t in translation_times])
|
| 141 |
+
avg_processing = statistics.mean([t['processing_overhead'] for t in translation_times])
|
| 142 |
+
avg_network = statistics.mean([t['network_overhead'] for t in translation_times])
|
| 143 |
+
|
| 144 |
+
print(f" Average client total: {avg_client:.3f}s")
|
| 145 |
+
print(f" Average server total: {avg_server:.3f}s")
|
| 146 |
+
print(f" Average inference: {avg_inference:.3f}s")
|
| 147 |
+
print(f" Average processing overhead: {avg_processing:.3f}s")
|
| 148 |
+
print(f" Average network overhead: {avg_network:.3f}s")
|
| 149 |
+
print(f" Efficiency: {(avg_inference/avg_server)*100:.1f}% (inference/server)")
|
| 150 |
+
|
| 151 |
+
if detection_times:
|
| 152 |
+
print(f"\nLanguage Detection Performance:")
|
| 153 |
+
avg_client = statistics.mean([t['client_total'] for t in detection_times])
|
| 154 |
+
avg_server = statistics.mean([t['server_total'] for t in detection_times])
|
| 155 |
+
avg_network = statistics.mean([t['network_overhead'] for t in detection_times])
|
| 156 |
+
|
| 157 |
+
print(f" Average client total: {avg_client:.3f}s")
|
| 158 |
+
print(f" Average server total: {avg_server:.3f}s")
|
| 159 |
+
print(f" Average network overhead: {avg_network:.3f}s")
|
| 160 |
+
|
| 161 |
+
print(f"\n[INFO] Server-side timing provides:")
|
| 162 |
+
print(f" - Accurate server processing time")
|
| 163 |
+
print(f" - Network overhead calculation")
|
| 164 |
+
print(f" - Performance bottleneck identification")
|
| 165 |
+
print(f" - Response headers for monitoring")
|
| 166 |
+
print(f" - Request tracking with unique IDs")
|
| 167 |
+
|
| 168 |
+
def test_concurrent_performance(api_url="https://sematech-sema-api.hf.space", num_requests=5):
|
| 169 |
+
"""Test concurrent request performance"""
|
| 170 |
+
|
| 171 |
+
print(f"\n[TEST] Concurrent Performance ({num_requests} requests)")
|
| 172 |
+
print("-" * 50)
|
| 173 |
+
|
| 174 |
+
import threading
|
| 175 |
+
import queue
|
| 176 |
+
|
| 177 |
+
results = queue.Queue()
|
| 178 |
+
|
| 179 |
+
def make_request(request_id):
|
| 180 |
+
start_time = time.time()
|
| 181 |
+
try:
|
| 182 |
+
response = requests.post(
|
| 183 |
+
f"{api_url}/translate",
|
| 184 |
+
headers={"Content-Type": "application/json"},
|
| 185 |
+
json={"text": f"Hello world {request_id}", "target_language": "swh_Latn"},
|
| 186 |
+
timeout=30
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
client_time = time.time() - start_time
|
| 190 |
+
|
| 191 |
+
if response.status_code == 200:
|
| 192 |
+
data = response.json()
|
| 193 |
+
server_time = data.get('total_time', 0)
|
| 194 |
+
inference_time = data.get('inference_time', 0)
|
| 195 |
+
|
| 196 |
+
results.put({
|
| 197 |
+
'request_id': request_id,
|
| 198 |
+
'success': True,
|
| 199 |
+
'client_time': client_time,
|
| 200 |
+
'server_time': server_time,
|
| 201 |
+
'inference_time': inference_time,
|
| 202 |
+
'translation': data['translated_text']
|
| 203 |
+
})
|
| 204 |
+
else:
|
| 205 |
+
results.put({
|
| 206 |
+
'request_id': request_id,
|
| 207 |
+
'success': False,
|
| 208 |
+
'error': response.status_code
|
| 209 |
+
})
|
| 210 |
+
|
| 211 |
+
except Exception as e:
|
| 212 |
+
results.put({
|
| 213 |
+
'request_id': request_id,
|
| 214 |
+
'success': False,
|
| 215 |
+
'error': str(e)
|
| 216 |
+
})
|
| 217 |
+
|
| 218 |
+
# Start concurrent requests
|
| 219 |
+
threads = []
|
| 220 |
+
start_time = time.time()
|
| 221 |
+
|
| 222 |
+
for i in range(num_requests):
|
| 223 |
+
thread = threading.Thread(target=make_request, args=(i+1,))
|
| 224 |
+
threads.append(thread)
|
| 225 |
+
thread.start()
|
| 226 |
+
|
| 227 |
+
# Wait for all requests to complete
|
| 228 |
+
for thread in threads:
|
| 229 |
+
thread.join()
|
| 230 |
+
|
| 231 |
+
total_time = time.time() - start_time
|
| 232 |
+
|
| 233 |
+
# Collect results
|
| 234 |
+
successful_requests = []
|
| 235 |
+
failed_requests = []
|
| 236 |
+
|
| 237 |
+
while not results.empty():
|
| 238 |
+
result = results.get()
|
| 239 |
+
if result['success']:
|
| 240 |
+
successful_requests.append(result)
|
| 241 |
+
else:
|
| 242 |
+
failed_requests.append(result)
|
| 243 |
+
|
| 244 |
+
print(f" Total time for {num_requests} concurrent requests: {total_time:.3f}s")
|
| 245 |
+
print(f" Successful requests: {len(successful_requests)}")
|
| 246 |
+
print(f" Failed requests: {len(failed_requests)}")
|
| 247 |
+
|
| 248 |
+
if successful_requests:
|
| 249 |
+
avg_client = statistics.mean([r['client_time'] for r in successful_requests])
|
| 250 |
+
avg_server = statistics.mean([r['server_time'] for r in successful_requests])
|
| 251 |
+
avg_inference = statistics.mean([r['inference_time'] for r in successful_requests])
|
| 252 |
+
|
| 253 |
+
print(f" Average client time: {avg_client:.3f}s")
|
| 254 |
+
print(f" Average server time: {avg_server:.3f}s")
|
| 255 |
+
print(f" Average inference time: {avg_inference:.3f}s")
|
| 256 |
+
print(f" Requests per second: {len(successful_requests)/total_time:.2f}")
|
| 257 |
+
|
| 258 |
+
if __name__ == "__main__":
|
| 259 |
+
import sys
|
| 260 |
+
|
| 261 |
+
api_url = "https://sematech-sema-api.hf.space"
|
| 262 |
+
if len(sys.argv) > 1:
|
| 263 |
+
api_url = sys.argv[1]
|
| 264 |
+
|
| 265 |
+
print(f"[INFO] Testing performance timing at: {api_url}")
|
| 266 |
+
|
| 267 |
+
# Test server-side timing
|
| 268 |
+
test_server_side_timing(api_url)
|
| 269 |
+
|
| 270 |
+
# Test concurrent performance
|
| 271 |
+
test_concurrent_performance(api_url)
|
| 272 |
+
|
| 273 |
+
print(f"\n[SUCCESS] Performance timing tests completed!")
|