Spaces:
Running
Running
File size: 14,369 Bytes
207ab7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 |
# google_free_translate.py
"""
Free Google Translate API implementation using the web endpoint.
No API key or billing required!
"""
import logging
import threading
import time
import json
import requests
from typing import Optional, Dict, Any
import urllib.parse
import random
class GoogleFreeTranslateNew:
"""Google Translate API for free translation using public web endpoints.
Uses the same endpoints as the Google Translate web interface and mobile apps.
No API key or Google Cloud credentials required!
"""
name = 'Google(Free)New'
free = True
endpoint: str = 'https://translate.googleapis.com/translate_a/single'
def __init__(self, source_language: str = "auto", target_language: str = "en", logger=None):
self.source_language = source_language
self.target_language = target_language
self.logger = logger or logging.getLogger(__name__)
self.cache = {} # Simple in-memory cache
self.cache_lock = threading.Lock()
self.request_lock = threading.Lock()
self.last_request_time = 0
self.rate_limit = 0.5 # 500ms between requests to avoid rate limiting
# Multiple user agents to rotate through (helps avoid 403)
self.user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
]
def _get_source_code(self):
"""Get the source language code."""
lang_map = {
"zh": "zh-CN",
"ja": "ja",
"ko": "ko",
"en": "en",
"auto": "auto"
}
return lang_map.get(self.source_language, self.source_language)
def _get_target_code(self):
"""Get the target language code."""
lang_map = {
"en": "en",
"zh": "zh-CN",
"ja": "ja",
"ko": "ko"
}
return lang_map.get(self.target_language, self.target_language)
def get_headers(self):
"""Get request headers that mimic a browser with random user agent."""
return {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': random.choice(self.user_agents), # Random user agent to avoid detection
'Referer': 'https://translate.google.com/',
'Origin': 'https://translate.google.com',
}
def _get_cache_key(self, text: str) -> str:
"""Generate cache key for the translation."""
return f"{self.source_language}_{self.target_language}_{hash(text)}"
def _rate_limit_delay(self):
"""Ensure rate limiting between requests."""
with self.request_lock:
current_time = time.time()
time_since_last = current_time - self.last_request_time
if time_since_last < self.rate_limit:
sleep_time = self.rate_limit - time_since_last
time.sleep(sleep_time)
self.last_request_time = time.time()
def translate(self, text: str) -> Dict[str, Any]:
"""
Translate text using Google's free web API.
Args:
text: Text to translate
Returns:
Dict with 'translatedText' and 'detectedSourceLanguage' keys
"""
if not text or not text.strip():
return {
'translatedText': '',
'detectedSourceLanguage': self.source_language
}
# Check cache first
cache_key = self._get_cache_key(text)
with self.cache_lock:
if cache_key in self.cache:
self.logger.debug(f"Cache hit for translation: {text[:50]}...")
return self.cache[cache_key]
# Rate limiting
self._rate_limit_delay()
try:
# Prepare request data
source_lang = self._get_source_code()
target_lang = self._get_target_code()
# Try multiple FREE endpoint formats (no credentials needed)
# These are public endpoints used by the web interface and mobile apps
# Ordered from most reliable to least reliable based on common usage
endpoints_to_try = [
'https://translate.googleapis.com/translate_a/single', # Most reliable public endpoint
'https://clients5.google.com/translate_a/t', # Mobile client endpoint
'https://translate.google.com/translate_a/single', # Direct web endpoint
'https://clients5.google.com/translate_a/single', # Alternative client5
]
for endpoint_url in endpoints_to_try:
try:
if 'clients5.google.com/translate_a/t' in endpoint_url:
# Use mobile client API format
result = self._translate_via_mobile_api(text, source_lang, target_lang, endpoint_url)
else:
# Use the public translate_a/single API format
result = self._translate_via_single_api(text, source_lang, target_lang, endpoint_url)
if result:
# Cache successful result
with self.cache_lock:
self.cache[cache_key] = result
# Limit cache size
if len(self.cache) > 1000:
# Remove oldest entries
oldest_keys = list(self.cache.keys())[:100]
for key in oldest_keys:
del self.cache[key]
return result
except Exception as e:
self.logger.warning(f"Failed with endpoint {endpoint_url}: {e}")
continue
# If all endpoints failed
raise Exception("All Google Translate endpoints failed")
except Exception as e:
self.logger.error(f"Translation failed: {e}")
# Return original text as fallback
return {
'translatedText': text,
'detectedSourceLanguage': self.source_language,
'error': str(e)
}
def _translate_via_single_api(self, text: str, source_lang: str, target_lang: str, endpoint_url: str) -> Optional[Dict[str, Any]]:
"""Translate using the translate_a/single endpoint (older format)."""
# Try multiple client types to avoid 403 errors
client_types = ['gtx', 'webapp', 't', 'dict-chrome-ex', 'android']
for client_type in client_types:
params = {
'client': client_type,
'sl': source_lang,
'tl': target_lang,
'dt': 't',
'q': text
}
# Add additional parameters for certain clients
if client_type == 'webapp':
params['dj'] = '1' # Return JSON format
params['dt'] = ['t', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 'at']
elif client_type == 'dict-chrome-ex':
params['dt'] = ['t', 'bd']
try:
result = self._try_single_api_request(params, endpoint_url, client_type)
if result:
return result
except Exception as e:
self.logger.debug(f"Client type {client_type} failed: {e}")
continue
return None
def _try_single_api_request(self, params: dict, endpoint_url: str, client_type: str) -> Optional[Dict[str, Any]]:
"""Try a single API request with given parameters."""
response = requests.get(
endpoint_url,
params=params,
headers=self.get_headers(),
timeout=10
)
if response.status_code == 200:
try:
result = response.json()
# Handle webapp format (dj=1 returns different structure)
if client_type == 'webapp' and isinstance(result, dict):
if 'sentences' in result:
translated_parts = []
for sentence in result['sentences']:
if 'trans' in sentence:
translated_parts.append(sentence['trans'])
translated_text = ''.join(translated_parts)
detected_lang = result.get('src', params.get('sl', 'auto'))
return {
'translatedText': translated_text,
'detectedSourceLanguage': detected_lang
}
# Handle standard array format
elif isinstance(result, list) and len(result) > 0:
# Extract translated text
translated_parts = []
for item in result[0]:
if isinstance(item, list) and len(item) > 0:
translated_parts.append(item[0])
translated_text = ''.join(translated_parts)
# Try to detect source language from response
detected_lang = params.get('sl', 'auto')
if len(result) > 2 and isinstance(result[2], str):
detected_lang = result[2]
return {
'translatedText': translated_text,
'detectedSourceLanguage': detected_lang
}
except (json.JSONDecodeError, KeyError, IndexError, TypeError) as e:
self.logger.warning(f"Failed to parse single API response with client {client_type}: {e}")
return None
def _translate_via_mobile_api(self, text: str, source_lang: str, target_lang: str, endpoint_url: str) -> Optional[Dict[str, Any]]:
"""Translate using the mobile client endpoint (clients5.google.com/translate_a/t)."""
params = {
'client': 't', # Mobile client
'sl': source_lang,
'tl': target_lang,
'q': text
}
# Use different headers for mobile endpoint
mobile_headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'User-Agent': 'GoogleTranslate/6.29.59279 (Linux; U; Android 11; Pixel 5)',
'Content-Type': 'application/x-www-form-urlencoded',
}
response = requests.post(
endpoint_url,
data=params,
headers=mobile_headers,
timeout=10
)
if response.status_code == 200:
try:
# This endpoint returns a simple JSON array
result = response.json()
if isinstance(result, list) and len(result) > 0:
# Extract translated text from first element
if isinstance(result[0], str):
translated_text = result[0]
elif isinstance(result[0], list) and len(result[0]) > 0:
translated_text = result[0][0]
else:
return None
return {
'translatedText': translated_text,
'detectedSourceLanguage': source_lang
}
except (json.JSONDecodeError, KeyError, IndexError, TypeError) as e:
self.logger.warning(f"Failed to parse mobile API response: {e}")
return None
# Convenience function for easy usage
def translate_text(text: str, source_lang: str = "auto", target_lang: str = "en") -> str:
"""
Simple function to translate text using Google's free API.
Args:
text: Text to translate
source_lang: Source language code (default: auto-detect)
target_lang: Target language code (default: en)
Returns:
Translated text
"""
translator = GoogleFreeTranslateNew(source_lang, target_lang)
result = translator.translate(text)
return result.get('translatedText', text)
if __name__ == "__main__":
# Test the translator
translator = GoogleFreeTranslateNew("auto", "en")
test_texts = [
"こんにちは、世界!", # Japanese
"안녕하세요 세계!", # Korean
"你好世界!", # Chinese
"Hola mundo" # Spanish
]
for test_text in test_texts:
result = translator.translate(test_text)
print(f"Original: {test_text}")
print(f"Translated: {result['translatedText']}")
print(f"Detected language: {result['detectedSourceLanguage']}")
print("-" * 50) |