feat(update): added a lang detetction endpoint that would be very good for chatbot implementations
Browse files- app/api/v1/endpoints.py +145 -1
- app/models/schemas.py +118 -0
- app/services/translation.py +33 -0
- curl_commands.md → tests/curl_commands.md +39 -1
- tests/simple_test.py +32 -0
app/api/v1/endpoints.py
CHANGED
|
@@ -15,11 +15,14 @@ from ...models.schemas import (
|
|
| 15 |
HealthResponse,
|
| 16 |
LanguagesResponse,
|
| 17 |
LanguageStatsResponse,
|
| 18 |
-
LanguageInfo
|
|
|
|
|
|
|
| 19 |
)
|
| 20 |
from ...services.translation import (
|
| 21 |
translate_with_detection,
|
| 22 |
translate_with_source,
|
|
|
|
| 23 |
models_loaded
|
| 24 |
)
|
| 25 |
from ...services.languages import (
|
|
@@ -324,6 +327,147 @@ async def translate_endpoint(
|
|
| 324 |
)
|
| 325 |
|
| 326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
@router.get(
|
| 328 |
"/languages",
|
| 329 |
response_model=LanguagesResponse,
|
|
|
|
| 15 |
HealthResponse,
|
| 16 |
LanguagesResponse,
|
| 17 |
LanguageStatsResponse,
|
| 18 |
+
LanguageInfo,
|
| 19 |
+
LanguageDetectionRequest,
|
| 20 |
+
LanguageDetectionResponse
|
| 21 |
)
|
| 22 |
from ...services.translation import (
|
| 23 |
translate_with_detection,
|
| 24 |
translate_with_source,
|
| 25 |
+
detect_language,
|
| 26 |
models_loaded
|
| 27 |
)
|
| 28 |
from ...services.languages import (
|
|
|
|
| 327 |
)
|
| 328 |
|
| 329 |
|
| 330 |
+
@router.post(
|
| 331 |
+
"/detect-language",
|
| 332 |
+
response_model=LanguageDetectionResponse,
|
| 333 |
+
tags=["Language Detection"],
|
| 334 |
+
summary="Detect Input Language",
|
| 335 |
+
description="Detect the language of input text for multilingual applications.",
|
| 336 |
+
responses={
|
| 337 |
+
200: {"description": "Language detected successfully"},
|
| 338 |
+
400: {"description": "Invalid request - empty text or text too long"},
|
| 339 |
+
429: {"description": "Rate limit exceeded"},
|
| 340 |
+
500: {"description": "Language detection service error"}
|
| 341 |
+
}
|
| 342 |
+
)
|
| 343 |
+
@limiter.limit(f"{settings.max_requests_per_minute}/minute")
|
| 344 |
+
async def detect_language_endpoint(
|
| 345 |
+
detection_request: LanguageDetectionRequest,
|
| 346 |
+
request: Request
|
| 347 |
+
):
|
| 348 |
+
"""
|
| 349 |
+
## 🔍 Detect Input Language
|
| 350 |
+
|
| 351 |
+
Detect the language of input text - perfect for multilingual chatbots and applications.
|
| 352 |
+
|
| 353 |
+
### 🎯 Use Cases
|
| 354 |
+
- **Multilingual Chatbots**: Detect user language before processing
|
| 355 |
+
- **Content Routing**: Route content based on detected language
|
| 356 |
+
- **Auto-Translation**: Decide whether translation is needed
|
| 357 |
+
- **Language Analytics**: Track language usage patterns
|
| 358 |
+
|
| 359 |
+
### 🤖 Chatbot Implementation Example
|
| 360 |
+
```python
|
| 361 |
+
# 1. Detect user input language
|
| 362 |
+
detection = await detect_language(user_input)
|
| 363 |
+
|
| 364 |
+
# 2. Decide processing flow
|
| 365 |
+
if detection.is_english:
|
| 366 |
+
# Process directly in English
|
| 367 |
+
response = await llm_chat(user_input)
|
| 368 |
+
else:
|
| 369 |
+
# Translate to English, process, translate back
|
| 370 |
+
english_input = await translate(user_input, "eng_Latn")
|
| 371 |
+
english_response = await llm_chat(english_input)
|
| 372 |
+
response = await translate(english_response, detection.detected_language)
|
| 373 |
+
```
|
| 374 |
+
|
| 375 |
+
### ✨ Features
|
| 376 |
+
- **High Accuracy**: FastText-based language detection
|
| 377 |
+
- **200+ Languages**: Supports all FLORES-200 languages
|
| 378 |
+
- **Confidence Scores**: Get detection confidence (0.0-1.0)
|
| 379 |
+
- **English Flag**: Quick check if input is English
|
| 380 |
+
- **Fast Processing**: ~0.01-0.05 seconds detection time
|
| 381 |
+
|
| 382 |
+
### 📊 Response Information
|
| 383 |
+
- **Language Code**: FLORES-200 format (e.g., swh_Latn)
|
| 384 |
+
- **Language Names**: Both English and native names
|
| 385 |
+
- **Confidence Score**: Detection accuracy (higher = more confident)
|
| 386 |
+
- **English Flag**: Boolean for quick English detection
|
| 387 |
+
- **Character Count**: Input text length for analytics
|
| 388 |
+
|
| 389 |
+
### 🔒 Limits
|
| 390 |
+
- **Rate Limit**: 60 requests per minute per IP
|
| 391 |
+
- **Text Length**: Maximum 1000 characters
|
| 392 |
+
- **Minimum Length**: At least 1 character required
|
| 393 |
+
"""
|
| 394 |
+
request_id = request.state.request_id
|
| 395 |
+
|
| 396 |
+
# Validate text length
|
| 397 |
+
if len(detection_request.text) > 1000:
|
| 398 |
+
raise HTTPException(
|
| 399 |
+
status_code=413,
|
| 400 |
+
detail="Text too long. Maximum 1000 characters allowed for language detection."
|
| 401 |
+
)
|
| 402 |
+
|
| 403 |
+
full_date, _ = get_nairobi_time()
|
| 404 |
+
character_count = len(detection_request.text)
|
| 405 |
+
|
| 406 |
+
# Log detection request
|
| 407 |
+
logger.info(
|
| 408 |
+
"language_detection_started",
|
| 409 |
+
request_id=request_id,
|
| 410 |
+
character_count=character_count
|
| 411 |
+
)
|
| 412 |
+
|
| 413 |
+
try:
|
| 414 |
+
# Detect language
|
| 415 |
+
detected_lang_code, confidence = detect_language(detection_request.text)
|
| 416 |
+
|
| 417 |
+
# Get language information
|
| 418 |
+
language_info = get_language_info(detected_lang_code)
|
| 419 |
+
|
| 420 |
+
# Handle case where language is not in our database (fallback)
|
| 421 |
+
if not language_info:
|
| 422 |
+
language_name = detected_lang_code
|
| 423 |
+
native_name = detected_lang_code
|
| 424 |
+
else:
|
| 425 |
+
language_name = language_info["name"]
|
| 426 |
+
native_name = language_info["native_name"]
|
| 427 |
+
|
| 428 |
+
# Check if detected language is English
|
| 429 |
+
is_english = detected_lang_code in ["eng_Latn", "eng_Arab"]
|
| 430 |
+
|
| 431 |
+
# Log successful detection
|
| 432 |
+
logger.info(
|
| 433 |
+
"language_detection_completed",
|
| 434 |
+
request_id=request_id,
|
| 435 |
+
detected_language=detected_lang_code,
|
| 436 |
+
confidence=confidence,
|
| 437 |
+
is_english=is_english,
|
| 438 |
+
character_count=character_count
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
return LanguageDetectionResponse(
|
| 442 |
+
detected_language=detected_lang_code,
|
| 443 |
+
language_name=language_name,
|
| 444 |
+
native_name=native_name,
|
| 445 |
+
confidence=confidence,
|
| 446 |
+
is_english=is_english,
|
| 447 |
+
character_count=character_count,
|
| 448 |
+
timestamp=full_date,
|
| 449 |
+
request_id=request_id
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
except Exception as e:
|
| 453 |
+
# Log detection error
|
| 454 |
+
logger.error(
|
| 455 |
+
"language_detection_failed",
|
| 456 |
+
request_id=request_id,
|
| 457 |
+
error=str(e),
|
| 458 |
+
error_type=type(e).__name__,
|
| 459 |
+
character_count=character_count
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
# Update error metrics
|
| 463 |
+
ERROR_COUNT.labels(error_type="language_detection_error").inc()
|
| 464 |
+
|
| 465 |
+
raise HTTPException(
|
| 466 |
+
status_code=500,
|
| 467 |
+
detail="Language detection service temporarily unavailable. Please try again later."
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
|
| 471 |
@router.get(
|
| 472 |
"/languages",
|
| 473 |
response_model=LanguagesResponse,
|
app/models/schemas.py
CHANGED
|
@@ -151,6 +151,124 @@ class HealthResponse(BaseModel):
|
|
| 151 |
timestamp: str = Field(..., description="Current timestamp")
|
| 152 |
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
class ErrorResponse(BaseModel):
|
| 155 |
"""Response model for error responses"""
|
| 156 |
|
|
|
|
| 151 |
timestamp: str = Field(..., description="Current timestamp")
|
| 152 |
|
| 153 |
|
| 154 |
+
class LanguageDetectionRequest(BaseModel):
|
| 155 |
+
"""
|
| 156 |
+
Language detection request model
|
| 157 |
+
|
| 158 |
+
For detecting the language of input text.
|
| 159 |
+
"""
|
| 160 |
+
|
| 161 |
+
text: str = Field(
|
| 162 |
+
...,
|
| 163 |
+
example="Habari ya asubuhi",
|
| 164 |
+
description="Text to detect language for (1-1000 characters)",
|
| 165 |
+
min_length=1,
|
| 166 |
+
max_length=1000,
|
| 167 |
+
title="Input Text"
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
class Config:
|
| 171 |
+
json_schema_extra = {
|
| 172 |
+
"examples": [
|
| 173 |
+
{
|
| 174 |
+
"summary": "Swahili text detection",
|
| 175 |
+
"description": "Detect language for Swahili greeting",
|
| 176 |
+
"value": {
|
| 177 |
+
"text": "Habari ya asubuhi"
|
| 178 |
+
}
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"summary": "English text detection",
|
| 182 |
+
"description": "Detect language for English text",
|
| 183 |
+
"value": {
|
| 184 |
+
"text": "Good morning, how are you?"
|
| 185 |
+
}
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"summary": "French text detection",
|
| 189 |
+
"description": "Detect language for French text",
|
| 190 |
+
"value": {
|
| 191 |
+
"text": "Bonjour, comment allez-vous?"
|
| 192 |
+
}
|
| 193 |
+
}
|
| 194 |
+
]
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
class LanguageDetectionResponse(BaseModel):
|
| 199 |
+
"""
|
| 200 |
+
Language detection response model
|
| 201 |
+
|
| 202 |
+
Contains detected language information and confidence.
|
| 203 |
+
"""
|
| 204 |
+
|
| 205 |
+
detected_language: str = Field(
|
| 206 |
+
...,
|
| 207 |
+
description="Detected language code in FLORES-200 format",
|
| 208 |
+
example="swh_Latn",
|
| 209 |
+
title="Detected Language Code"
|
| 210 |
+
)
|
| 211 |
+
language_name: str = Field(
|
| 212 |
+
...,
|
| 213 |
+
description="Human-readable name of detected language",
|
| 214 |
+
example="Swahili",
|
| 215 |
+
title="Language Name"
|
| 216 |
+
)
|
| 217 |
+
native_name: str = Field(
|
| 218 |
+
...,
|
| 219 |
+
description="Native name of detected language",
|
| 220 |
+
example="Kiswahili",
|
| 221 |
+
title="Native Language Name"
|
| 222 |
+
)
|
| 223 |
+
confidence: float = Field(
|
| 224 |
+
...,
|
| 225 |
+
description="Detection confidence score (0.0 to 1.0)",
|
| 226 |
+
example=0.9876,
|
| 227 |
+
ge=0.0,
|
| 228 |
+
le=1.0,
|
| 229 |
+
title="Confidence Score"
|
| 230 |
+
)
|
| 231 |
+
is_english: bool = Field(
|
| 232 |
+
...,
|
| 233 |
+
description="Whether the detected language is English",
|
| 234 |
+
example=False,
|
| 235 |
+
title="Is English"
|
| 236 |
+
)
|
| 237 |
+
character_count: int = Field(
|
| 238 |
+
...,
|
| 239 |
+
description="Number of characters in input text",
|
| 240 |
+
example=17,
|
| 241 |
+
ge=1,
|
| 242 |
+
title="Character Count"
|
| 243 |
+
)
|
| 244 |
+
timestamp: str = Field(
|
| 245 |
+
...,
|
| 246 |
+
description="Detection timestamp in Nairobi timezone",
|
| 247 |
+
example="Monday | 2024-06-21 | 14:30:25",
|
| 248 |
+
title="Timestamp"
|
| 249 |
+
)
|
| 250 |
+
request_id: str = Field(
|
| 251 |
+
...,
|
| 252 |
+
description="Unique request identifier for debugging",
|
| 253 |
+
example="550e8400-e29b-41d4-a716-446655440000",
|
| 254 |
+
title="Request ID"
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
class Config:
|
| 258 |
+
json_schema_extra = {
|
| 259 |
+
"example": {
|
| 260 |
+
"detected_language": "swh_Latn",
|
| 261 |
+
"language_name": "Swahili",
|
| 262 |
+
"native_name": "Kiswahili",
|
| 263 |
+
"confidence": 0.9876,
|
| 264 |
+
"is_english": False,
|
| 265 |
+
"character_count": 17,
|
| 266 |
+
"timestamp": "Monday | 2024-06-21 | 14:30:25",
|
| 267 |
+
"request_id": "550e8400-e29b-41d4-a716-446655440000"
|
| 268 |
+
}
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
|
| 272 |
class ErrorResponse(BaseModel):
|
| 273 |
"""Response model for error responses"""
|
| 274 |
|
app/services/translation.py
CHANGED
|
@@ -220,6 +220,39 @@ def translate_with_source(text: str, source_lang: str, target_lang: str) -> Tupl
|
|
| 220 |
raise e
|
| 221 |
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
def models_loaded() -> bool:
|
| 224 |
"""Check if all models are loaded"""
|
| 225 |
return all([lang_model, sp_model, translator])
|
|
|
|
| 220 |
raise e
|
| 221 |
|
| 222 |
|
| 223 |
+
def detect_language(text: str) -> Tuple[str, float]:
|
| 224 |
+
"""
|
| 225 |
+
Detect the language of input text
|
| 226 |
+
|
| 227 |
+
Returns:
|
| 228 |
+
Tuple of (language_code, confidence_score)
|
| 229 |
+
"""
|
| 230 |
+
try:
|
| 231 |
+
# Clean text for better detection
|
| 232 |
+
cleaned_text = text.replace('\n', ' ').strip()
|
| 233 |
+
|
| 234 |
+
# Get predictions with confidence scores
|
| 235 |
+
predictions = lang_model.predict(cleaned_text, k=1)
|
| 236 |
+
|
| 237 |
+
# Extract language code and confidence
|
| 238 |
+
language_code = predictions[0][0].replace('__label__', '')
|
| 239 |
+
confidence = float(predictions[1][0])
|
| 240 |
+
|
| 241 |
+
logger.info(
|
| 242 |
+
"language_detected",
|
| 243 |
+
text_length=len(text),
|
| 244 |
+
detected_language=language_code,
|
| 245 |
+
confidence=confidence
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
return language_code, confidence
|
| 249 |
+
|
| 250 |
+
except Exception as e:
|
| 251 |
+
logger.error("language_detection_failed", error=str(e), error_type=type(e).__name__)
|
| 252 |
+
# Re-raise the exception to be handled by the endpoint
|
| 253 |
+
raise e
|
| 254 |
+
|
| 255 |
+
|
| 256 |
def models_loaded() -> bool:
|
| 257 |
"""Check if all models are loaded"""
|
| 258 |
return all([lang_model, sp_model, translator])
|
curl_commands.md → tests/curl_commands.md
RENAMED
|
@@ -60,7 +60,45 @@ curl -X POST "$API_URL/api/v1/translate" \
|
|
| 60 |
}'
|
| 61 |
```
|
| 62 |
|
| 63 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
### Get All Supported Languages
|
| 66 |
```bash
|
|
|
|
| 60 |
}'
|
| 61 |
```
|
| 62 |
|
| 63 |
+
## � Language Detection Endpoints
|
| 64 |
+
|
| 65 |
+
### Detect Language of Text
|
| 66 |
+
```bash
|
| 67 |
+
curl -X POST "$API_URL/detect-language" \
|
| 68 |
+
-H "Content-Type: application/json" \
|
| 69 |
+
-d '{
|
| 70 |
+
"text": "Habari ya asubuhi"
|
| 71 |
+
}'
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
### Detect Language (English Text)
|
| 75 |
+
```bash
|
| 76 |
+
curl -X POST "$API_URL/detect-language" \
|
| 77 |
+
-H "Content-Type: application/json" \
|
| 78 |
+
-d '{
|
| 79 |
+
"text": "Good morning, how are you today?"
|
| 80 |
+
}'
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
### Detect Language (French Text)
|
| 84 |
+
```bash
|
| 85 |
+
curl -X POST "$API_URL/detect-language" \
|
| 86 |
+
-H "Content-Type: application/json" \
|
| 87 |
+
-d '{
|
| 88 |
+
"text": "Bonjour, comment allez-vous?"
|
| 89 |
+
}'
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
### Detect Language (Versioned Endpoint)
|
| 93 |
+
```bash
|
| 94 |
+
curl -X POST "$API_URL/api/v1/detect-language" \
|
| 95 |
+
-H "Content-Type: application/json" \
|
| 96 |
+
-d '{
|
| 97 |
+
"text": "Hola, ¿cómo estás?"
|
| 98 |
+
}'
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
## �🗣️ Language Information Endpoints
|
| 102 |
|
| 103 |
### Get All Supported Languages
|
| 104 |
```bash
|
tests/simple_test.py
CHANGED
|
@@ -108,6 +108,37 @@ def test_search():
|
|
| 108 |
|
| 109 |
print("-" * 50)
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
def run_all_tests():
|
| 112 |
"""Run all tests"""
|
| 113 |
print(f"🧪 Testing API at: {API_URL}")
|
|
@@ -117,6 +148,7 @@ def run_all_tests():
|
|
| 117 |
test_translation()
|
| 118 |
test_languages()
|
| 119 |
test_search()
|
|
|
|
| 120 |
|
| 121 |
print("🎉 All tests completed!")
|
| 122 |
|
|
|
|
| 108 |
|
| 109 |
print("-" * 50)
|
| 110 |
|
| 111 |
+
def test_language_detection():
|
| 112 |
+
"""Test language detection endpoint"""
|
| 113 |
+
print("🔍 Testing language detection...")
|
| 114 |
+
|
| 115 |
+
test_cases = [
|
| 116 |
+
{"text": "Habari ya asubuhi", "expected_lang": "swh_Latn"},
|
| 117 |
+
{"text": "Good morning", "expected_lang": "eng_Latn"},
|
| 118 |
+
{"text": "Bonjour", "expected_lang": "fra_Latn"},
|
| 119 |
+
{"text": "Hola mundo", "expected_lang": "spa_Latn"}
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
for test_case in test_cases:
|
| 123 |
+
response = requests.post(
|
| 124 |
+
f"{API_URL}/detect-language",
|
| 125 |
+
headers={"Content-Type": "application/json"},
|
| 126 |
+
json={"text": test_case["text"]}
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
if response.status_code == 200:
|
| 130 |
+
data = response.json()
|
| 131 |
+
detected = data['detected_language']
|
| 132 |
+
confidence = data['confidence']
|
| 133 |
+
is_english = data['is_english']
|
| 134 |
+
|
| 135 |
+
print(f"✅ '{test_case['text']}' → {detected} ({data['language_name']})")
|
| 136 |
+
print(f" Confidence: {confidence:.3f}, Is English: {is_english}")
|
| 137 |
+
else:
|
| 138 |
+
print(f"❌ Detection failed for '{test_case['text']}'")
|
| 139 |
+
|
| 140 |
+
print("-" * 50)
|
| 141 |
+
|
| 142 |
def run_all_tests():
|
| 143 |
"""Run all tests"""
|
| 144 |
print(f"🧪 Testing API at: {API_URL}")
|
|
|
|
| 148 |
test_translation()
|
| 149 |
test_languages()
|
| 150 |
test_search()
|
| 151 |
+
test_language_detection()
|
| 152 |
|
| 153 |
print("🎉 All tests completed!")
|
| 154 |
|