diff --git "a/unified_api_client.py" "b/unified_api_client.py" new file mode 100644--- /dev/null +++ "b/unified_api_client.py" @@ -0,0 +1,10210 @@ +# unified_api_client.py - REFACTORED with Enhanced Error Handling and Extended AI Model Support +""" +Key Design Principles: +- The client handles API communication and returns accurate status +- The client must save responses properly for duplicate detection +- The client must return accurate finish_reason for truncation detection +- The client must support cancellation for timeout handling +- Enhanced Multi-Key Mode: Rotates API keys during exponential backoff on server errors (500, 502, 503, 504) + to avoid waiting on potentially problematic keys before trying alternatives + +Supported models and their prefixes (Updated July 2025): +- OpenAI: gpt*, o1*, o3*, o4*, codex* (e.g., gpt-4, gpt-4o, gpt-4o-mini, gpt-4.5, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3, o3-mini, o3-pro, o4-mini) +- Google: gemini*, palm*, bard* (e.g., gemini-2.0-flash-exp, gemini-2.5-pro, gemini-2.5-flash) +- Anthropic: claude*, sonnet*, opus*, haiku* (e.g., claude-3.5-sonnet, claude-3.7-sonnet, claude-4-opus, claude-4-sonnet, claude-opus-4-20250514, claude-sonnet-4-20250514) +- DeepSeek: deepseek* (e.g., deepseek-chat, deepseek-vl, deepseek-r1) +- Mistral: mistral*, mixtral*, codestral* +- Cohere: command*, cohere*, aya* (e.g., aya-vision, command-r7b) +- AI21: j2*, jurassic*, jamba* +- Together AI: llama*, together*, alpaca*, vicuna*, wizardlm*, openchat* +- Perplexity: perplexity*, pplx*, sonar* +- Replicate: replicate* +- Yi (01.AI): yi* (e.g., yi-34b-chat-200k, yi-vl) +- Qwen (Alibaba): qwen* (e.g., qwen2.5-vl) +- Baichuan: baichuan* +- Zhipu AI: glm*, chatglm* +- Moonshot: moonshot*, kimi* +- Groq: groq*, llama-groq*, mixtral-groq* +- Baidu: ernie* +- Tencent: hunyuan* +- iFLYTEK: spark* +- ByteDance: doubao* +- MiniMax: minimax*, abab* +- SenseNova: sensenova*, nova* +- InternLM: intern*, internlm* +- TII: falcon* (e.g., falcon-2-11b) +- Microsoft: phi*, orca* +- Azure: azure* (for Azure OpenAI deployments) +- Aleph Alpha: luminous* +- Databricks: dolly* +- HuggingFace: starcoder* +- Salesforce: codegen* +- BigScience: bloom* +- Meta: opt*, galactica*, llama2*, llama3*, llama4*, codellama* +- xAI: grok* (e.g., grok-3, grok-vision) +- Poe: poe/* (e.g., poe/claude-4-opus, poe/gpt-4.5, poe/Assistant) +- OpenRouter: or/*, openrouter/* (e.g., or/anthropic/claude-4-opus, or/openai/gpt-4.5) +- Fireworks AI: fireworks/* (e.g., fireworks/llama-v3-70b) + +ELECTRONHUB SUPPORT: +ElectronHub is an API aggregator that provides access to multiple models. +To use ElectronHub, prefix your model name with one of these: +- eh/ (e.g., eh/yi-34b-chat-200k) +- electronhub/ (e.g., electronhub/gpt-4.5) +- electron/ (e.g., electron/claude-4-opus) + +ElectronHub allows you to access models from multiple providers using a single API key. + +POE SUPPORT: +Poe by Quora provides access to multiple AI models through their platform. +To use Poe, prefix your model name with 'poe/': +- poe/claude-4-opus +- poe/claude-4-sonnet +- poe/gpt-4.5 +- poe/gpt-4.1 +- poe/Assistant +- poe/gemini-2.5-pro + +OPENROUTER SUPPORT: +OpenRouter is a unified interface for 300+ models from various providers. +To use OpenRouter, prefix your model name with 'or/' or 'openrouter/': +- or/anthropic/claude-4-opus +- openrouter/openai/gpt-4.5 +- or/google/gemini-2.5-pro +- or/meta-llama/llama-4-70b + +Environment Variables: +- SEND_INTERVAL_SECONDS: Delay between API calls (respects GUI settings) +- YI_API_BASE_URL: Custom endpoint for Yi models (optional) +- ELECTRONHUB_API_URL: Custom ElectronHub endpoint (default: https://api.electronhub.ai/v1) +- AZURE_OPENAI_ENDPOINT: Azure OpenAI endpoint (for azure* models) +- AZURE_API_VERSION: Azure API version (default: 2024-02-01) +- DATABRICKS_API_URL: Databricks workspace URL +- SALESFORCE_API_URL: Salesforce API endpoint +- OPENROUTER_REFERER: HTTP referer for OpenRouter (default: https://github.com/Shirochi-stack/Glossarion) +- OPENROUTER_APP_NAME: App name for OpenRouter (default: Glossarion Translation) +- POE_API_KEY: API key for Poe platform +- GROQ_API_URL: Custom Groq endpoint (default: https://api.groq.com/openai/v1) +- FIREWORKS_API_URL: Custom Fireworks AI endpoint (default: https://api.fireworks.ai/inference/v1) +- DISABLE_GEMINI_SAFETY: Set to "true" to disable Gemini safety filters (respects GUI toggle) +- XAI_API_URL: Custom xAI endpoint (default: https://api.x.ai/v1) +- DEEPSEEK_API_URL: Custom DeepSeek endpoint (default: https://api.deepseek.com/v1) + +SAFETY SETTINGS: +The client respects the GUI's "Disable Gemini API Safety Filters" toggle via the +DISABLE_GEMINI_SAFETY environment variable. When enabled, it applies API-level safety +settings where available: + +- Gemini: Sets all harm categories to BLOCK_NONE (most permissive) +- OpenRouter: Disables safe mode via X-Safe-Mode header +- Poe: Disables safe mode via safe_mode parameter +- Other OpenAI-compatible providers: Sets moderation=false where supported + +Note: Not all providers support API-level safety toggles. OpenAI and Anthropic APIs +do not have direct safety filter controls. The client only applies settings that are +officially supported by each provider's API. + +Note: Many Chinese model providers (Yi, Qwen, Baichuan, etc.) may require +API keys from their respective platforms. Some endpoints might need adjustment +based on your region or deployment. +""" +import os +import json +import requests +from requests.adapters import HTTPAdapter +try: + from urllib3.util.retry import Retry +except Exception: + Retry = None +from dataclasses import dataclass +from typing import Optional, Dict, Any, Tuple, List +import logging +import re +import base64 +import contextlib +from PIL import Image +import io +import time +import random +import csv +from datetime import datetime +import traceback +import hashlib +import html +try: + from multi_api_key_manager import APIKeyPool, APIKeyEntry, RateLimitCache +except ImportError: + try: + from .multi_api_key_manager import APIKeyPool, APIKeyEntry, RateLimitCache + except ImportError: + # Fallback classes if module not available + class APIKeyPool: + def __init__(self): pass + class APIKeyEntry: + def __init__(self): pass + class RateLimitCache: + def __init__(self): pass +import threading +import uuid +from threading import RLock +from collections import defaultdict +logger = logging.getLogger(__name__) + +# IMPORTANT: This client respects GUI settings via environment variables: +# - SEND_INTERVAL_SECONDS: Delay between API calls (set by GUI) +# All API providers INCLUDING ElectronHub respect this setting for proper GUI integration + +# Note: For Yi models through ElectronHub, use eh/yi-34b-chat-200k format +# For direct Yi API access, use yi-34b-chat-200k format + +# Set up logging +logger = logging.getLogger(__name__) + +# Enable HTTP request logging for debugging +def setup_http_logging(): + """Enable detailed HTTP request/response logging for debugging""" + import logging + + # Enable httpx logging (used by OpenAI SDK) + httpx_logger = logging.getLogger("httpx") + httpx_logger.setLevel(logging.INFO) + + # Enable requests logging (fallback HTTP calls) + requests_logger = logging.getLogger("requests.packages.urllib3") + requests_logger.setLevel(logging.INFO) + + # Enable OpenAI SDK logging + openai_logger = logging.getLogger("openai") + openai_logger.setLevel(logging.DEBUG) + + # Create console handler if not exists + if not any(isinstance(h, logging.StreamHandler) for h in logging.root.handlers): + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s') + console_handler.setFormatter(formatter) + + httpx_logger.addHandler(console_handler) + requests_logger.addHandler(console_handler) + openai_logger.addHandler(console_handler) + + # Prevent duplicate logs + httpx_logger.propagate = False + requests_logger.propagate = False + openai_logger.propagate = False + +# Enable HTTP logging on module import +setup_http_logging() + +# OpenAI SDK +try: + import openai + from openai import OpenAIError +except ImportError: + openai = None + class OpenAIError(Exception): pass +try: + import httpx + from httpx import HTTPStatusError +except ImportError: + httpx = None + class HTTPStatusError(Exception): pass + +# Gemini SDK +try: + from google import genai + from google.genai import types + GENAI_AVAILABLE = True +except ImportError: + genai = None + types = None + GENAI_AVAILABLE = False + +# Anthropic SDK (optional - can use requests if not installed) +try: + import anthropic +except ImportError: + anthropic = None + +# Cohere SDK (optional) +try: + import cohere +except ImportError: + cohere = None + +# Mistral SDK (optional) +try: + from mistralai.client import MistralClient + from mistralai.models.chat_completion import ChatMessage +except ImportError: + MistralClient = None + +# Google Vertex AI API Cloud +try: + from google.cloud import aiplatform + from google.oauth2 import service_account + import vertexai + VERTEX_AI_AVAILABLE = True +except ImportError: + VERTEX_AI_AVAILABLE = False + print("Vertex AI SDK not installed. Install with: pip install google-cloud-aiplatform") + +try: + import deepl + DEEPL_AVAILABLE = True +except ImportError: + deepl = None + DEEPL_AVAILABLE = False + +try: + from google.cloud import translate_v2 as google_translate + GOOGLE_TRANSLATE_AVAILABLE = True +except ImportError: + google_translate = None + GOOGLE_TRANSLATE_AVAILABLE = False + +from functools import lru_cache +from datetime import datetime, timedelta + + +@dataclass +class UnifiedResponse: + """Standardized response format for all API providers""" + content: str + finish_reason: Optional[str] = None + usage: Optional[Dict[str, int]] = None + raw_response: Optional[Any] = None + error_details: Optional[Dict[str, Any]] = None + + @property + def is_truncated(self) -> bool: + """Check if the response was truncated + + IMPORTANT: This is used by retry logic to detect when to retry with more tokens + """ + return self.finish_reason in ['length', 'max_tokens', 'stop_sequence_limit', 'truncated', 'incomplete'] + + @property + def is_complete(self) -> bool: + """Check if the response completed normally""" + return self.finish_reason in ['stop', 'complete', 'end_turn', 'finished', None] + + @property + def is_error(self) -> bool: + """Check if the response is an error""" + return self.finish_reason == 'error' or bool(self.error_details) + +class UnifiedClientError(Exception): + """Generic exception for UnifiedClient errors.""" + def __init__(self, message, error_type=None, http_status=None, details=None): + super().__init__(message) + self.error_type = error_type + self.http_status = http_status + self.details = details + +class UnifiedClient: + # ----- Helper methods to reduce duplication ----- + @contextlib.contextmanager + def _model_lock(self): + """Context manager for thread-safe model access""" + if hasattr(self, '_instance_model_lock') and self._instance_model_lock is not None: + with self._instance_model_lock: + yield + else: + # Fallback - create a temporary lock if needed + if not hasattr(self, '_temp_model_lock'): + self._temp_model_lock = threading.RLock() + with self._temp_model_lock: + yield + def _get_send_interval(self) -> float: + try: + return float(os.getenv("SEND_INTERVAL_SECONDS", "2")) + except Exception: + return 2.0 + + def _debug_log(self, message: str) -> None: + """Print debug logs unless in cleanup/stop state or quiet mode. + Suppresses noisy logs when the operation is cancelled or in cleanup. + Honours QUIET_LOGS=1 environment toggle. + """ + try: + if getattr(self, '_in_cleanup', False): + return + if getattr(self, '_cancelled', False): + return + # Some call sites expose a stop check + if hasattr(self, '_is_stop_requested') and callable(getattr(self, '_is_stop_requested')): + try: + if self._is_stop_requested(): + return + except Exception: + pass + if os.getenv('QUIET_LOGS', '0') == '1': + return + print(message) + except Exception: + # Best-effort logging; swallow any print failures + try: + print(message) + except Exception: + pass + + def _safe_len(self, obj, context="unknown"): + """Safely get length of an object with better error reporting""" + try: + if obj is None: + print(f"⚠️ Warning: Attempting to get length of None in context: {context}") + return 0 + return len(obj) + except TypeError as e: + print(f"❌ TypeError in _safe_len for context '{context}': {e}") + print(f"❌ Object type: {type(obj)}, Object value: {obj}") + return 0 + except Exception as e: + print(f"❌ Unexpected error in _safe_len for context '{context}': {e}") + return 0 + + def _extract_first_image_base64(self, messages) -> Optional[str]: + if messages is None: + return None + for msg in messages: + if msg is None: + continue + content = msg.get('content') + if isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get('type') == 'image_url': + url = part.get('image_url', {}).get('url', '') + if isinstance(url, str): + if url.startswith('data:') and ',' in url: + return url.split(',', 1)[1] + return url + return None + + + def _get_timeout_config(self) -> Tuple[bool, int]: + enabled = os.getenv("RETRY_TIMEOUT", "0") == "1" + window = int(os.getenv("CHUNK_TIMEOUT", "180")) + return enabled, window + def _with_attempt_suffix(self, payload_name: str, response_name: str, request_id: str, attempt: int, is_image: bool) -> Tuple[str, str]: + base_payload, ext_payload = os.path.splitext(payload_name) + base_response, ext_response = os.path.splitext(response_name) + unique_suffix = f"_{request_id}_imgA{attempt}" if is_image else f"_{request_id}_A{attempt}" + return f"{base_payload}{unique_suffix}{ext_payload}", f"{base_response}{unique_suffix}{ext_response}" + + def _maybe_retry_main_key_on_prohibited(self, messages, temperature, max_tokens, max_completion_tokens, context, request_id=None, image_data=None): + if not (self._multi_key_mode and getattr(self, 'original_api_key', None) and getattr(self, 'original_model', None)): + return None + try: + return self._retry_with_main_key( + messages, temperature, max_tokens, max_completion_tokens, context, + request_id=request_id, image_data=image_data + ) + except Exception: + return None + + def _detect_safety_filter(self, messages, extracted_content: str, finish_reason: Optional[str], response: Any, provider: str) -> bool: + # Heuristic patterns consolidated from previous branches + # 1) Suspicious finish reasons with empty content + if not extracted_content and finish_reason in ['length', 'stop', 'max_tokens', None]: + return True + # 2) Safety indicators in raw response/error details + response_str = "" + if response is not None: + if hasattr(response, 'raw_response') and response.raw_response is not None: + response_str = str(response.raw_response).lower() + elif hasattr(response, 'error_details') and response.error_details is not None: + response_str = str(response.error_details).lower() + else: + response_str = str(response).lower() + safety_indicators = [ + 'safety', 'blocked', 'prohibited', 'harmful', 'inappropriate', + 'refused', 'content_filter', 'content policy', 'violation', + 'cannot assist', 'unable to process', 'against guidelines', + 'ethical', 'responsible ai', 'harm_category', 'nsfw', + 'adult content', 'explicit', 'violence', 'disturbing' + ] + if any(ind in response_str for ind in safety_indicators): + return True + # 3) Safety phrases in extracted content + if extracted_content: + content_lower = extracted_content.lower() + safety_phrases = [ + 'blocked', 'safety', 'cannot', 'unable', 'prohibited', + 'content filter', 'refused', 'inappropriate', 'i cannot', + "i can't", "i'm not able", "not able to", "against my", + 'content policy', 'guidelines', 'ethical', + 'analyze this image', 'process this image', 'describe this image', 'nsfw' + ] + if any(p in content_lower for p in safety_phrases): + return True + # 4) Provider-specific empty behavior + if provider in ['openai', 'azure', 'electronhub', 'openrouter', 'poe', 'gemini']: + if not extracted_content and finish_reason != 'error': + return True + # 5) Suspiciously short output vs long input - FIX: Add None checks + if extracted_content and len(extracted_content) < 50: + # FIX: Add None check for messages + if messages is not None: + input_length = 0 + for m in messages: + if m is not None and m.get('role') == 'user': + content = m.get('content', '') + # FIX: Add None check for content + if content is not None: + input_length += len(str(content)) + if input_length > 200 and any(w in extracted_content.lower() for w in ['cannot', 'unable', 'sorry', 'assist']): + return True + return False + + def _finalize_empty_response(self, messages, context, response, extracted_content: str, finish_reason: Optional[str], provider: str, request_type: str, start_time: float) -> Tuple[str, str]: + is_safety = self._detect_safety_filter(messages, extracted_content, finish_reason, response, provider) + # Always save failure snapshot and log truncation details + self._save_failed_request(messages, f"Empty {request_type} response from {getattr(self, 'client_type', 'unknown')}", context, response) + error_details = getattr(response, 'error_details', None) + if is_safety: + error_details = { + 'likely_safety_filter': True, + 'original_finish_reason': finish_reason, + 'provider': getattr(self, 'client_type', None), + 'model': self.model, + 'key_identifier': getattr(self, 'key_identifier', None), + 'request_type': request_type + } + self._log_truncation_failure( + messages=messages, + response_content=extracted_content or "", + finish_reason='content_filter' if is_safety else (finish_reason or 'error'), + context=context, + error_details=error_details + ) + # Stats + self._track_stats(context, False, f"empty_{request_type}_response", time.time() - start_time) + # Fallback message + if is_safety: + fb_reason = f"image_safety_filter_{provider}" if request_type == 'image' else f"safety_filter_{provider}" + else: + fb_reason = "empty_image" if request_type == 'image' else "empty" + fallback = self._handle_empty_result(messages, context, getattr(response, 'error_details', fb_reason) if response else fb_reason) + return fallback, ('content_filter' if is_safety else 'error') + + def _is_rate_limit_error(self, exc: Exception) -> bool: + s = str(exc).lower() + if hasattr(exc, 'error_type') and getattr(exc, 'error_type') == 'rate_limit': + return True + return ('429' in s) or ('rate limit' in s) or ('quota' in s) + + def _compute_backoff(self, attempt: int, base: float, cap: float) -> float: + delay = (base * (2 ** attempt)) + random.uniform(0, 1) + return min(delay, cap) + + def _normalize_token_params(self, max_tokens: Optional[int], max_completion_tokens: Optional[int]) -> Tuple[Optional[int], Optional[int]]: + if self._is_o_series_model(): + mct = max_completion_tokens if max_completion_tokens is not None else (max_tokens or getattr(self, 'default_max_tokens', 8192)) + return None, mct + else: + mt = max_tokens if max_tokens is not None else (max_completion_tokens or getattr(self, 'default_max_tokens', 8192)) + return mt, None + def _apply_api_delay(self) -> None: + if getattr(self, '_in_cleanup', False): + # Suppress log in cleanup mode + # self._debug_log("⚡ Skipping API delay (cleanup mode)") + return + try: + api_delay = float(os.getenv("SEND_INTERVAL_SECONDS", "2")) + except Exception: + api_delay = 2.0 + if api_delay > 0: + self._debug_log(f"⏳ Waiting {api_delay}s before next API call...") + time.sleep(api_delay) + + def _set_idempotency_context(self, request_id: str, attempt: int) -> None: + tls = self._get_thread_local_client() + tls.idem_request_id = request_id + tls.idem_attempt = attempt + + def _get_extraction_kwargs(self) -> dict: + ct = getattr(self, 'client_type', None) + if ct == 'gemini': + return { + 'supports_thinking': self._supports_thinking(), + 'thinking_budget': int(os.getenv("THINKING_BUDGET", "-1")), + } + return {} + + def _is_rate_limit_error(self, exc: Exception) -> bool: + s = str(exc).lower() + if hasattr(exc, 'error_type') and getattr(exc, 'error_type') == 'rate_limit': + return True + return ('429' in s) or ('rate limit' in s) or ('quota' in s) + + def _compute_backoff(self, attempt: int, base: float, cap: float) -> float: + delay = (base * (2 ** attempt)) + random.uniform(0, 1) + return min(delay, cap) + + def _normalize_token_params(self, max_tokens: Optional[int], max_completion_tokens: Optional[int]) -> Tuple[Optional[int], Optional[int]]: + if self._is_o_series_model(): + mct = max_completion_tokens if max_completion_tokens is not None else (max_tokens or getattr(self, 'default_max_tokens', 8192)) + return None, mct + else: + mt = max_tokens if max_tokens is not None else (max_completion_tokens or getattr(self, 'default_max_tokens', 8192)) + return mt, None + """ + Unified client with fixed thread-safe multi-key support + + Key improvements: + 1. Thread-local storage for API clients + 2. Proper key rotation per request + 3. Thread-safe rate limit handling + 4. Cleaner error handling and retry logic + 5. INSTANCE-BASED multi-key mode (not class-based) + """ + # Thread safety for file operations + _file_write_lock = RLock() + _tracker_lock = RLock() + _model_lock = RLock() + + # Class-level shared resources - properly initialized + _rate_limit_cache = None + _api_key_pool: Optional[APIKeyPool] = None + _pool_lock = threading.Lock() + + # Request tracking + _global_request_counter = 0 + _counter_lock = threading.Lock() + + # Thread-local storage for clients and key assignments + _thread_local = threading.local() + + # global stop flag + _global_cancelled = False + + # Legacy tracking (for compatibility) + _key_assignments = {} # thread_id -> (key_index, key_identifier) + _assignment_lock = threading.Lock() + + # Track displayed log messages to avoid spam + _displayed_messages = set() + _message_lock = threading.Lock() + + # Your existing MODEL_PROVIDERS and other class variables + MODEL_PROVIDERS = { + 'vertex/': 'vertex_model_garden', + '@': 'vertex_model_garden', + 'gpt': 'openai', + 'o1': 'openai', + 'o3': 'openai', + 'o4': 'openai', + 'gemini': 'gemini', + 'claude': 'anthropic', + 'chutes': 'chutes', + 'chutes/': 'chutes', + 'sonnet': 'anthropic', + 'opus': 'anthropic', + 'haiku': 'anthropic', + 'deepseek': 'deepseek', + 'mistral': 'mistral', + 'mixtral': 'mistral', + 'codestral': 'mistral', + 'command': 'cohere', + 'cohere': 'cohere', + 'aya': 'cohere', + 'j2': 'ai21', + 'jurassic': 'ai21', + 'llama': 'together', + 'together': 'together', + 'perplexity': 'perplexity', + 'pplx': 'perplexity', + 'sonar': 'perplexity', + 'replicate': 'replicate', + 'yi': 'yi', + 'qwen': 'qwen', + 'baichuan': 'baichuan', + 'glm': 'zhipu', + 'chatglm': 'zhipu', + 'moonshot': 'moonshot', + 'kimi': 'moonshot', + 'groq': 'groq', + 'llama-groq': 'groq', + 'mixtral-groq': 'groq', + 'ernie': 'baidu', + 'hunyuan': 'tencent', + 'spark': 'iflytek', + 'doubao': 'bytedance', + 'minimax': 'minimax', + 'abab': 'minimax', + 'sensenova': 'sensenova', + 'nova': 'sensenova', + 'intern': 'internlm', + 'internlm': 'internlm', + 'falcon': 'tii', + 'jamba': 'ai21', + 'phi': 'microsoft', + 'azure': 'azure', + 'palm': 'google', + 'bard': 'google', + 'codex': 'openai', + 'luminous': 'alephalpha', + 'alpaca': 'together', + 'vicuna': 'together', + 'wizardlm': 'together', + 'openchat': 'together', + 'orca': 'microsoft', + 'dolly': 'databricks', + 'starcoder': 'huggingface', + 'codegen': 'salesforce', + 'bloom': 'bigscience', + 'opt': 'meta', + 'galactica': 'meta', + 'llama2': 'meta', + 'llama3': 'meta', + 'llama4': 'meta', + 'codellama': 'meta', + 'grok': 'xai', + 'poe': 'poe', + 'or': 'openrouter', + 'openrouter': 'openrouter', + 'fireworks': 'fireworks', + 'eh/': 'electronhub', + 'electronhub/': 'electronhub', + 'electron/': 'electronhub', + 'deepl': 'deepl', + 'google-translate': 'google_translate', + } + + # Model-specific constraints + MODEL_CONSTRAINTS = { + 'temperature_fixed': ['o4-mini', 'o1-mini', 'o1-preview', 'o3-mini', 'o3', 'o3-pro', 'o4-mini', 'gpt-5-mini','gpt-5','gpt-5-nano'], + 'no_system_message': ['o1', 'o1-preview', 'o3', 'o3-pro'], + 'max_completion_tokens': ['o4', 'o1', 'o3', 'gpt-5-mini','gpt-5','gpt-5-nano'], + 'chinese_optimized': ['qwen', 'yi', 'glm', 'chatglm', 'baichuan', 'ernie', 'hunyuan'], + } + + @classmethod + def _log_once(cls, message: str, is_debug: bool = False): + """Log a message only once per session to avoid spam""" + with cls._message_lock: + if message not in cls._displayed_messages: + cls._displayed_messages.add(message) + if is_debug: + print(f"[DEBUG] {message}") + else: + logger.info(message) + return True + return False + + @classmethod + def setup_multi_key_pool(cls, keys_list, force_rotation=True, rotation_frequency=1): + """Setup the shared API key pool""" + with cls._pool_lock: + if cls._api_key_pool is None: + cls._api_key_pool = APIKeyPool() + + # Initialize rate limit cache if needed + if cls._rate_limit_cache is None: + cls._rate_limit_cache = RateLimitCache() + + # Validate and fix encrypted keys + validated_keys = [] + encrypted_keys_fixed = 0 + + # FIX 1: Use keys_list parameter instead of undefined 'config' + for i, key_data in enumerate(keys_list): + if not isinstance(key_data, dict): + continue + + api_key = key_data.get('api_key', '') + if not api_key: + continue + + # Fix encrypted keys + if api_key.startswith('ENC:'): + try: + from api_key_encryption import get_handler + handler = get_handler() + decrypted_key = handler.decrypt_value(api_key) + + if decrypted_key != api_key and not decrypted_key.startswith('ENC:'): + # Create a copy with decrypted key + fixed_key_data = key_data.copy() + fixed_key_data['api_key'] = decrypted_key + validated_keys.append(fixed_key_data) + encrypted_keys_fixed += 1 + except Exception: + continue + else: + # Key is already decrypted + validated_keys.append(key_data) + + if not validated_keys: + return False + + # Load the validated keys + cls._api_key_pool.load_from_list(validated_keys) + #cls._main_fallback_key = validated_keys[0]['api_key'] + #cls._main_fallback_model = validated_keys[0]['model'] + #print(f"🔑 Using {validated_keys[0]['model']} as main fallback key") + + # FIX 2: Store settings at class level (these affect all instances) + # These are class variables since pool is shared + if not hasattr(cls, '_force_rotation'): + cls._force_rotation = force_rotation + if not hasattr(cls, '_rotation_frequency'): + cls._rotation_frequency = rotation_frequency + + # Or update if provided + cls._force_rotation = force_rotation + cls._rotation_frequency = rotation_frequency + + # Single debug message + if encrypted_keys_fixed > 0: + print(f"🔑 Multi-key pool: {len(validated_keys)} keys loaded ({encrypted_keys_fixed} required decryption fix)") + else: + print(f"🔑 Multi-key pool: {len(validated_keys)} keys loaded") + + return True + + @classmethod + def initialize_key_pool(cls, key_list: list): + """Initialize the shared API key pool (legacy compatibility)""" + with cls._pool_lock: + if cls._api_key_pool is None: + cls._api_key_pool = APIKeyPool() + cls._api_key_pool.load_from_list(key_list) + + @classmethod + def get_key_pool(cls): + """Get the shared API key pool (legacy compatibility)""" + with cls._pool_lock: + if cls._api_key_pool is None: + cls._api_key_pool = APIKeyPool() + return cls._api_key_pool + + def _get_max_retries(self) -> int: + """Get max retry count from environment variable, default to 7""" + return int(os.getenv('MAX_RETRIES', '7')) + + # Class-level cancellation flag for all instances + _global_cancelled = False + _global_cancel_lock = threading.RLock() + + @classmethod + def set_global_cancellation(cls, cancelled: bool): + """Set global cancellation flag for all client instances""" + with cls._global_cancel_lock: + cls._global_cancelled = cancelled + + @classmethod + def is_globally_cancelled(cls) -> bool: + """Check if globally cancelled""" + with cls._global_cancel_lock: + return cls._global_cancelled + + def __init__(self, api_key: str, model: str, output_dir: str = "Output"): + """Initialize the unified client with enhanced thread safety""" + # Store original values + self.original_api_key = api_key + self.original_model = model + + self._sequential_send_lock = threading.Lock() + + # Thread submission timing controls + self._thread_submission_lock = threading.Lock() + self._last_thread_submission_time = 0 + self._thread_submission_count = 0 + + # Add unique session ID for this client instance + self.session_id = str(uuid.uuid4())[:8] + + # INSTANCE-LEVEL multi-key configuration + self._multi_key_mode = False # INSTANCE variable, not class! + self._force_rotation = True + self._rotation_frequency = 1 + + # Instance variables + self.output_dir = output_dir + self._cancelled = False + self._in_cleanup = False + self.conversation_message_count = 0 + self.context = None + self.current_session_context = None + + # Request tracking (from first init) + self._request_count = 0 + self._thread_request_count = 0 + + # Thread coordination for key assignment + self._key_assignment_lock = RLock() + self._thread_key_assignments = {} # {thread_id: (key_index, timestamp)} + self._instance_model_lock = threading.RLock() + + # Thread-local storage for client instances + self._thread_local = threading.local() + + # Thread-specific request counters + self._thread_request_counters = defaultdict(int) + self._counter_lock = RLock() + + # File write coordination + self._file_write_locks = {} # {filepath: RLock} + self._file_write_locks_lock = RLock() + if not hasattr(self, '_instance_model_lock'): + self._instance_model_lock = threading.RLock() + + # Stats tracking + self.stats = { + 'total_requests': 0, + 'successful_requests': 0, + 'failed_requests': 0, + 'errors': defaultdict(int), + 'response_times': [], + 'empty_results': 0 # Add this for completeness + } + + # Pattern recognition attributes + self.pattern_counts = {} # Track pattern frequencies for reinforcement + self.last_pattern = None # Track last seen pattern + + # Call reset_stats if it exists (from first init) + if hasattr(self, 'reset_stats'): + self.reset_stats() + + # File tracking for duplicate prevention + self._active_files = set() # Track files being written + self._file_lock = RLock() + + # Timeout configuration + enabled, window = self._get_timeout_config() + self.request_timeout = int(os.getenv("CHUNK_TIMEOUT", "900")) if enabled else 36000 # 10 hours + + # Initialize client references + self.api_key = api_key + self.model = model + self.key_identifier = "Single Key" + self.current_key_index = None + self.openai_client = None + self.gemini_client = None + self.mistral_client = None + self.cohere_client = None + self._actual_output_filename = None + self._current_output_file = None + self._last_response_filename = None + + + # Store Google Cloud credentials path if available + self.google_creds_path = None + # Store current key's Google credentials, Azure endpoint, and Google region + self.current_key_google_creds = None + self.current_key_azure_endpoint = None + self.current_key_google_region = None + + # Azure-specific flags + self.is_azure = False + self.azure_endpoint = None + self.azure_api_version = None + + self.translator_config = { + 'use_fallback_keys': os.getenv('USE_FALLBACK_KEYS', '0') == '1', + 'fallback_keys': json.loads(os.getenv('FALLBACK_KEYS', '[]')) + } + + # Debug print to verify + if self.translator_config['use_fallback_keys']: + num_fallbacks = len(self.translator_config['fallback_keys']) + print(f"🔑 Fallback keys loaded: {num_fallbacks} keys") + + # Check if multi-key mode should be enabled FOR THIS INSTANCE + use_multi_keys_env = os.getenv('USE_MULTI_API_KEYS', '0') == '1' + print(f"[DEBUG] USE_MULTI_API_KEYS env var: {os.getenv('USE_MULTI_API_KEYS')}") + print(f"[DEBUG] Creating new instance - multi-key mode from env: {use_multi_keys_env}") + + if use_multi_keys_env: + # Initialize from environment + multi_keys_json = os.getenv('MULTI_API_KEYS', '[]') + print(f"[DEBUG] Loading multi-keys config...") + force_rotation = os.getenv('FORCE_KEY_ROTATION', '1') == '1' + rotation_frequency = int(os.getenv('ROTATION_FREQUENCY', '1')) + + try: + multi_keys = json.loads(multi_keys_json) + if multi_keys: + # Setup the shared pool + self.setup_multi_key_pool(multi_keys, force_rotation, rotation_frequency) + + # Enable multi-key mode FOR THIS INSTANCE + self._multi_key_mode = True + self._force_rotation = force_rotation + self._rotation_frequency = rotation_frequency + + print(f"[DEBUG] ✅ This instance has multi-key mode ENABLED") + else: + print(f"[DEBUG] ❌ No keys found in config, staying in single-key mode") + self._multi_key_mode = False + except Exception as e: + print(f"Failed to load multi-key config: {e}") + self._multi_key_mode = False + print(f"[DEBUG] ❌ Error loading config, falling back to single-key mode") + else: + #print(f"[DEBUG] ❌ Multi-key mode is DISABLED for this instance (env var = 0)") + self._multi_key_mode = False + + # Initial setup based on THIS INSTANCE's mode + if not self._multi_key_mode: + self.api_key = api_key + self.model = model + self.key_identifier = "Single Key" + self._setup_client() + + # Check for Vertex AI Model Garden models (contain @ symbol) + # NOTE: This happens AFTER the initial setup, as in the second version + if '@' in self.model or self.model.startswith('vertex/'): + # For Vertex AI, we need Google Cloud credentials, not API key + self.client_type = 'vertex_model_garden' + + # Try to find Google Cloud credentials + # 1. Check environment variable + self.google_creds_path = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') + + # 2. Check if passed as api_key (for compatibility) + if not self.google_creds_path and api_key and os.path.exists(api_key): + self.google_creds_path = api_key + # Use logger if available, otherwise print + if hasattr(self, 'logger'): + self.logger.info("Using API key parameter as Google Cloud credentials path") + else: + print("Using API key parameter as Google Cloud credentials path") + + # 3. Will check GUI config later during send if needed + + if self.google_creds_path: + msg = f"Vertex AI Model Garden: Using credentials from {self.google_creds_path}" + if hasattr(self, 'logger'): + self.logger.info(msg) + else: + print(msg) + else: + print("Vertex AI Model Garden: Google Cloud credentials not yet configured") + else: + # Only set up client if not in multi-key mode + # Multi-key mode will set up the client when a key is selected + if not self._multi_key_mode: + # NOTE: This is a SECOND call to _setup_client() in the else branch + # Determine client type from model name + self._setup_client() + print(f"[DEBUG] After setup - client_type: {getattr(self, 'client_type', None)}, openai_client: {self.openai_client}") + + # FORCE OPENAI CLIENT IF CUSTOM BASE URL IS SET AND ENABLED + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + + # Force OpenAI client when custom endpoint is enabled + if custom_base_url and use_custom_endpoint and self.openai_client is None: + original_client_type = self.client_type + print(f"[DEBUG] Custom base URL detected and enabled, overriding {original_client_type or 'unmatched'} model to use OpenAI client: {self.model}") + self.client_type = 'openai' + + # Check if openai module is available + try: + import openai + except ImportError: + raise ImportError("OpenAI library not installed. Install with: pip install openai") + + # Validate URL has protocol + if not custom_base_url.startswith(('http://', 'https://')): + print(f"[WARNING] Custom base URL missing protocol, adding https://") + custom_base_url = 'https://' + custom_base_url + + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=custom_base_url + ) + print(f"[DEBUG] OpenAI client created with custom base URL: {custom_base_url}") + elif custom_base_url and not use_custom_endpoint: + print(f"[DEBUG] Custom base URL detected but disabled via toggle, using standard client") + + def _apply_thread_submission_delay(self): + # Get threading delay from environment (default 0.5) + thread_delay = float(os.getenv("THREAD_SUBMISSION_DELAY_SECONDS", "0.5")) + + if thread_delay <= 0: + return + + sleep_time = 0 + should_log = False + log_message = "" + + # HOLD LOCK ONLY BRIEFLY to check timing and update counter + with self._thread_submission_lock: + current_time = time.time() + time_since_last_submission = current_time - self._last_thread_submission_time + + if time_since_last_submission < thread_delay: + sleep_time = thread_delay - time_since_last_submission + # Update the timestamp NOW while we have the lock + self._last_thread_submission_time = time.time() + + # Determine if we should log (but don't log yet) + if self._thread_submission_count < 3: + should_log = True + log_message = f"🧵 [{threading.current_thread().name}] Thread delay: {sleep_time:.1f}s" + elif self._thread_submission_count == 3: + should_log = True + log_message = f"🧵 [Subsequent thread delays: {thread_delay}s each...]" + + self._thread_submission_count += 1 + # LOCK RELEASED HERE + + # NOW do the sleep OUTSIDE the lock + if sleep_time > 0: + if should_log: + print(log_message) + + # Interruptible sleep + elapsed = 0 + check_interval = 0.1 + while elapsed < sleep_time: + if self._cancelled: + print(f"🛑 Threading delay cancelled") + return # Exit early if cancelled + + time.sleep(min(check_interval, sleep_time - elapsed)) + elapsed += check_interval + + def _get_thread_local_client(self): + """Get or create thread-local client""" + thread_id = threading.current_thread().ident + + # Check if we need a new client for this thread + if not hasattr(self._thread_local, 'initialized'): + self._thread_local.initialized = False + self._thread_local.api_key = None + self._thread_local.model = None + self._thread_local.key_index = None + self._thread_local.key_identifier = None + self._thread_local.request_count = 0 + self._thread_local.openai_client = None + self._thread_local.gemini_client = None + self._thread_local.mistral_client = None + self._thread_local.cohere_client = None + self._thread_local.client_type = None + self._thread_local.current_request_label = None + + # THREAD-LOCAL CACHE + self._thread_local.request_cache = {} # Each thread gets its own cache! + self._thread_local.cache_hits = 0 + self._thread_local.cache_misses = 0 + + return self._thread_local + + def _ensure_thread_client(self): + """Ensure the current thread has a properly initialized client with thread safety""" + # Check if cancelled before proceeding + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + + tls = self._get_thread_local_client() + thread_name = threading.current_thread().name + thread_id = threading.current_thread().ident + + # Multi-key mode + if self._multi_key_mode: + # Check if we need to rotate + should_rotate = False + + if not tls.initialized: + should_rotate = True + print(f"[Thread-{thread_name}] Initializing with multi-key mode") + elif self._force_rotation: + tls.request_count = getattr(tls, 'request_count', 0) + 1 + if tls.request_count >= self._rotation_frequency: + should_rotate = True + tls.request_count = 0 + print(f"[Thread-{thread_name}] Rotating key (reached {self._rotation_frequency} requests)") + + if should_rotate: + # Release previous thread assignment to avoid stale usage tracking + if hasattr(self._api_key_pool, 'release_thread_assignment'): + try: + self._api_key_pool.release_thread_assignment(thread_id) + except Exception: + pass + + # Get a key using thread-safe method with timeout + key_info = None + + # Add timeout protection for key retrieval + start_time = time.time() + max_wait = 120 # 120 seconds max to get a key + + # First try using the pool's method if available + if hasattr(self._api_key_pool, 'get_key_for_thread'): + try: + key_info = self._api_key_pool.get_key_for_thread( + force_rotation=should_rotate, + rotation_frequency=self._rotation_frequency + ) + if key_info: + key, key_index, key_id = key_info + # Convert to tuple format expected below + key_info = (key, key_index) + except Exception as e: + print(f"[Thread-{thread_name}] Error getting key from pool: {e}") + key_info = None + + # Fallback to our method with timeout check + if not key_info: + if time.time() - start_time > max_wait: + raise UnifiedClientError(f"Timeout getting key for thread after {max_wait}s", error_type="timeout") + key_info = self._get_next_available_key_for_thread() + + if key_info: + key, key_index = key_info[:2] # Handle both tuple formats + + # Generate key identifier + key_id = f"Key#{key_index+1} ({key.model})" + if hasattr(key, 'identifier') and key.identifier: + key_id = key.identifier + + # Update thread-local state (no lock needed, thread-local is safe) + tls.api_key = key.api_key + tls.model = key.model + tls.key_index = key_index + tls.key_identifier = key_id + tls.google_credentials = getattr(key, 'google_credentials', None) + tls.azure_endpoint = getattr(key, 'azure_endpoint', None) + tls.azure_api_version = getattr(key, 'azure_api_version', None) + tls.google_region = getattr(key, 'google_region', None) + tls.use_individual_endpoint = getattr(key, 'use_individual_endpoint', False) + tls.initialized = True + tls.last_rotation = time.time() + + # MICROSECOND LOCK: Only when copying to instance variables + with self._model_lock: + # Copy to instance for compatibility + self.api_key = tls.api_key + self.model = tls.model + self.key_identifier = tls.key_identifier + self.current_key_index = key_index + self.current_key_google_creds = tls.google_credentials + self.current_key_azure_endpoint = tls.azure_endpoint + self.current_key_azure_api_version = tls.azure_api_version + self.current_key_google_region = tls.google_region + self.current_key_use_individual_endpoint = tls.use_individual_endpoint + + # Log key assignment - FIX: Add None check for api_key + if self.api_key and len(self.api_key) > 12: + masked_key = self.api_key[:4] + "..." + self.api_key[-4:] + elif self.api_key and len(self.api_key) > 5: + masked_key = self.api_key[:3] + "..." + self.api_key[-2:] + else: + masked_key = "***" + + print(f"[Thread-{thread_name}] 🔑 Using {self.key_identifier} - {masked_key}") + + # Setup client with new key (might need lock if it modifies instance state) + self._setup_client() + + # CRITICAL FIX: Apply individual key's Azure endpoint like single-key mode does + self._apply_individual_key_endpoint_if_needed() + return + else: + # No keys available + raise UnifiedClientError("No available API keys for thread", error_type="no_keys") + else: + # Not rotating, ensure instance variables match thread-local + if tls.initialized: + # MICROSECOND LOCK: When syncing instance variables + with self._model_lock: + self.api_key = tls.api_key + self.model = tls.model + self.key_identifier = tls.key_identifier + self.current_key_index = getattr(tls, 'key_index', None) + self.current_key_google_creds = getattr(tls, 'google_credentials', None) + self.current_key_azure_endpoint = getattr(tls, 'azure_endpoint', None) + self.current_key_azure_api_version = getattr(tls, 'azure_api_version', None) + self.current_key_google_region = getattr(tls, 'google_region', None) + self.current_key_use_individual_endpoint = getattr(tls, 'use_individual_endpoint', False) + + # Single key mode + elif not tls.initialized: + tls.api_key = self.original_api_key + tls.model = self.original_model + tls.key_identifier = "Single Key" + tls.initialized = True + tls.request_count = 0 + + # MICROSECOND LOCK: When setting instance variables + with self._model_lock: + self.api_key = tls.api_key + self.model = tls.model + self.key_identifier = tls.key_identifier + + logger.debug(f"[Thread-{thread_name}] Single-key mode: Using {self.model}") + self._setup_client() + + def _get_thread_key(self) -> Optional[Tuple[str, int]]: + """Get the API key assigned to current thread""" + thread_id = threading.current_thread().ident + + with self._assignment_lock: + if thread_id in self._key_assignments: + return self._key_assignments[thread_id] + + return None + + def _assign_thread_key(self): + """Assign a key to the current thread""" + thread_id = threading.current_thread().ident + thread_name = threading.current_thread().name + + # Check if cancelled at start + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + + # Check if thread already has a key + existing = self._get_thread_key() + if existing and not self._should_rotate_thread_key(): + # Thread already has a key and doesn't need rotation + key_index, key_identifier = existing + self.current_key_index = key_index + self.key_identifier = key_identifier + + # Apply the key settings + if key_index < len(self._api_key_pool.keys): + key = self._api_key_pool.keys[key_index] + self.api_key = key.api_key + self.model = key.model + return + + # Get next available key for this thread + max_retries = self._get_max_retries() + retry_count = 0 + + while retry_count <= max_retries: + with self._pool_lock: + key_info = self._get_next_available_key_for_thread() + if key_info: + key, key_index = key_info + self.api_key = key.api_key + self.model = key.model + self.current_key_index = key_index + self.key_identifier = f"Key#{key_index+1} ({self.model})" + + # Store assignment + with self._assignment_lock: + self._key_assignments[thread_id] = (key_index, self.key_identifier) + + # FIX: Add None check for api_key + if self.api_key and len(self.api_key) > 12: + masked_key = self.api_key[:8] + "..." + self.api_key[-4:] + else: + masked_key = self.api_key or "***" + print(f"[THREAD-{thread_name}] 🔑 Assigned {self.key_identifier} - {masked_key}") + + # Setup client for this key + self._setup_client() + self._apply_custom_endpoint_if_needed() + print(f"[THREAD-{thread_name}] 🔄 Key assignment: Client setup completed, ready for requests...") + time.sleep(0.1) # Brief pause after key assignment for stability + return + + # No key available - all are on cooldown + if retry_count < max_retries: + wait_time = self._get_shortest_cooldown_time() + print(f"[THREAD-{thread_name}] No keys available, waiting {wait_time}s (retry {retry_count + 1}/{max_retries})") + + # Wait with cancellation check + for i in range(wait_time): + if hasattr(self, '_cancelled') and self._cancelled: + raise UnifiedClientError("Operation cancelled while waiting for key", error_type="cancelled") + time.sleep(1) + if i % 10 == 0 and i > 0: + print(f"[THREAD-{thread_name}] Still waiting... {wait_time - i}s remaining") + + # Clear expired entries before next attempt + if hasattr(self, '_rate_limit_cache') and self._rate_limit_cache: + self._rate_limit_cache.clear_expired() + print(f"[THREAD-{thread_name}] 🔄 Cooldown wait: Cache cleared, attempting next key assignment...") + time.sleep(0.1) # Brief pause after cooldown wait for retry stability + + retry_count += 1 + + # If we've exhausted all retries, raise error + raise UnifiedClientError(f"No available API keys for thread after {max_retries} retries", error_type="no_keys") + + def _get_next_available_key_for_thread(self) -> Optional[Tuple]: + """Get next available key for thread assignment with proper thread safety""" + if not self._api_key_pool: + return None + + thread_name = threading.current_thread().name + + # Stop check + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + + # Use the APIKeyPool's built-in thread-safe method + if hasattr(self._api_key_pool, 'get_key_for_thread'): + # Let the pool handle all the thread assignment logic + key_info = self._api_key_pool.get_key_for_thread( + force_rotation=getattr(self, '_force_rotation', True), + rotation_frequency=getattr(self, '_rotation_frequency', 1) + ) + + if key_info: + key, key_index, key_id = key_info + print(f"[{thread_name}] Got {key_id} from pool") + return (key, key_index) + else: + # Pool couldn't provide a key, all are on cooldown + print(f"[{thread_name}] No keys available from pool") + return None + + # Fallback: If pool doesn't have the method, use simpler logic + print("APIKeyPool missing get_key_for_thread method, using fallback") + + with self.__class__._pool_lock: + # Simple round-robin without complex thread tracking + for _ in range(len(self._api_key_pool.keys)): + current_idx = getattr(self._api_key_pool, 'current_index', 0) + + # Ensure index is valid + if current_idx >= len(self._api_key_pool.keys): + current_idx = 0 + self._api_key_pool.current_index = 0 + + key = self._api_key_pool.keys[current_idx] + key_id = f"Key#{current_idx+1} ({key.model})" + + # Advance index for next call + self._api_key_pool.current_index = (current_idx + 1) % len(self._api_key_pool.keys) + + # Check availability + if key.is_available() and not self._rate_limit_cache.is_rate_limited(key_id): + print(f"[{thread_name}] Assigned {key_id} (fallback)") + return (key, current_idx) + + # No available keys + print(f"[{thread_name}] All keys unavailable in fallback") + return None + + def _wait_for_available_key(self) -> Optional[Tuple]: + """Wait for a key to become available (called outside lock)""" + thread_name = threading.current_thread().name + + # Check if cancelled first + if self._cancelled: + if not self._is_stop_requested(): + logger.info(f"[Thread-{thread_name}] Operation cancelled, not waiting for key") + return None + + # Get shortest cooldown time with timeout protection + wait_time = self._get_shortest_cooldown_time() + + # Cap maximum wait time to prevent infinite waits + max_wait_time = 120 # 2 minutes max + if wait_time > max_wait_time: + print(f"[Thread-{thread_name}] Cooldown time {wait_time}s exceeds max {max_wait_time}s") + wait_time = max_wait_time + + if wait_time <= 0: + # Keys should be available now + with self.__class__._pool_lock: + for i, key in enumerate(self._api_key_pool.keys): + key_id = f"Key#{i+1} ({key.model})" + if key.is_available() and not self._rate_limit_cache.is_rate_limited(key_id): + return (key, i) + + print(f"[Thread-{thread_name}] All keys on cooldown. Waiting {wait_time}s...") + + # Wait with cancellation check + wait_start = time.time() + while time.time() - wait_start < wait_time: + if self._cancelled: + print(f"[Thread-{thread_name}] Wait cancelled by user") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + # Check every second if a key became available early + with self.__class__._pool_lock: + for i, key in enumerate(self._api_key_pool.keys): + key_id = f"Key#{i+1} ({key.model})" + if key.is_available() and not self._rate_limit_cache.is_rate_limited(key_id): + print(f"[Thread-{thread_name}] Key became available early: {key_id}") + print(f"[Thread-{thread_name}] 🔄 Early key availability: Key ready for immediate use...") + time.sleep(0.1) # Brief pause after early detection for stability + return (key, i) + + time.sleep(1) + + # Progress indicator + elapsed = int(time.time() - wait_start) + if elapsed % 10 == 0 and elapsed > 0: + remaining = wait_time - elapsed + print(f"[Thread-{thread_name}] Still waiting... {remaining}s remaining") + + # Clear expired entries from cache + self._rate_limit_cache.clear_expired() + + # Final attempt after wait + with self.__class__._pool_lock: + # Try to find an available key + for i, key in enumerate(self._api_key_pool.keys): + key_id = f"Key#{i+1} ({key.model})" + if key.is_available() and not self._rate_limit_cache.is_rate_limited(key_id): + return (key, i) + + # Still no keys? Return the first enabled one (last resort) + for i, key in enumerate(self._api_key_pool.keys): + if key.enabled: + print(f"[Thread-{thread_name}] WARNING: Using potentially rate-limited key as last resort") + return (key, i) + + return None + + def _should_rotate_thread_key(self) -> bool: + """Check if current thread should rotate its key""" + if not self._force_rotation: + return False + + # Check thread-local request count + if not hasattr(self._thread_local, 'request_count'): + self._thread_local.request_count = 0 + + self._thread_local.request_count += 1 + + if self._thread_local.request_count >= self._rotation_frequency: + self._thread_local.request_count = 0 + return True + + return False + + def _handle_rate_limit_for_thread(self): + """Handle rate limit by marking current thread's key and getting a new one (thread-safe)""" + if not self._multi_key_mode: # Check INSTANCE variable + return + + thread_id = threading.current_thread().ident + thread_name = threading.current_thread().name + + # Get thread-local state first (thread-safe by nature) + tls = self._get_thread_local_client() + + # Store the current key info before we change anything + current_key_index = None + current_key_identifier = None + + # Safely get current key information from thread-local storage + if hasattr(tls, 'key_index') and tls.key_index is not None: + current_key_index = tls.key_index + current_key_identifier = getattr(tls, 'key_identifier', f"Key#{current_key_index+1}") + elif hasattr(self, 'current_key_index') and self.current_key_index is not None: + # Fallback to instance variable if thread-local not set + current_key_index = self.current_key_index + current_key_identifier = self.key_identifier + + # Mark the current key as rate limited (if we have one) + if current_key_index is not None and self._api_key_pool: + # Use the pool's thread-safe method to mark the error + self._api_key_pool.mark_key_error(current_key_index, 429) + + # Get cooldown value safely + cooldown = 60 # Default + with self.__class__._pool_lock: + if current_key_index < len(self._api_key_pool.keys): + key = self._api_key_pool.keys[current_key_index] + cooldown = getattr(key, 'cooldown', 60) + + print(f"[THREAD-{thread_name}] 🕐 Marking {current_key_identifier} for cooldown ({cooldown}s)") + + # Add to rate limit cache (this is already thread-safe) + if hasattr(self.__class__, '_rate_limit_cache') and self.__class__._rate_limit_cache: + self.__class__._rate_limit_cache.add_rate_limit(current_key_identifier, cooldown) + + # Clear thread-local state to force new key assignment + tls.initialized = False + tls.api_key = None + tls.model = None + tls.key_index = None + tls.key_identifier = None + tls.request_count = 0 + + # Remove any legacy assignments (thread-safe with lock) + if hasattr(self, '_assignment_lock') and hasattr(self, '_key_assignments'): + with self._assignment_lock: + if thread_id in self._key_assignments: + del self._key_assignments[thread_id] + + # Release thread assignment in the pool (if pool supports it) + if hasattr(self._api_key_pool, 'release_thread_assignment'): + self._api_key_pool.release_thread_assignment(thread_id) + + # Now force getting a new key + # This will call _ensure_thread_client which will get a new key + print(f"[THREAD-{thread_name}] 🔄 Requesting new key after rate limit...") + + try: + # Ensure we get a new client with a new key + self._ensure_thread_client() + + # Verify we got a different key + new_key_index = getattr(tls, 'key_index', None) + new_key_identifier = getattr(tls, 'key_identifier', 'Unknown') + + if new_key_index != current_key_index: + print(f"[THREAD-{thread_name}] ✅ Successfully rotated from {current_key_identifier} to {new_key_identifier}") + else: + print(f"[THREAD-{thread_name}] ⚠️ Warning: Got same key back: {new_key_identifier}") + + except Exception as e: + print(f"[THREAD-{thread_name}] ❌ Failed to get new key after rate limit: {e}") + raise UnifiedClientError(f"Failed to rotate key after rate limit: {e}", error_type="no_keys") + + # Helper methods that need to check instance state + def _count_available_keys(self) -> int: + """Count how many keys are currently available""" + if not self._multi_key_mode or not self.__class__._api_key_pool: + return 0 + + count = 0 + for i, key in enumerate(self.__class__._api_key_pool.keys): + if key.enabled: + key_id = f"Key#{i+1} ({key.model})" + # Check both rate limit cache AND key's own cooling status + is_rate_limited = self.__class__._rate_limit_cache.is_rate_limited(key_id) + is_cooling = key.is_cooling_down # Also check the key's own status + + if not is_rate_limited and not is_cooling: + count += 1 + return count + + def _mark_key_success(self): + """Mark the current key as successful (thread-safe)""" + # Check both instance and class-level cancellation + if (hasattr(self, '_cancelled') and self._cancelled) or self.__class__._global_cancelled: + # Don't mark success if we're cancelled + return + + if not self._multi_key_mode: + return + + # Get thread-local state + tls = self._get_thread_local_client() + key_index = getattr(tls, 'key_index', None) + + # Fallback to instance variable if thread-local not set + if key_index is None: + key_index = getattr(self, 'current_key_index', None) + + if key_index is not None and self.__class__._api_key_pool: + # Use the pool's thread-safe method + self.__class__._api_key_pool.mark_key_success(key_index) + + def _mark_key_error(self, error_code: int = None): + """Mark current key as having an error and apply cooldown if rate limited (thread-safe)""" + # Check both instance and class-level cancellation + if (hasattr(self, '_cancelled') and self._cancelled) or self.__class__._global_cancelled: + # Don't mark error if we're cancelled + return + + if not self._multi_key_mode: + return + + # Get thread-local state + tls = self._get_thread_local_client() + key_index = getattr(tls, 'key_index', None) + + # Fallback to instance variable if thread-local not set + if key_index is None: + key_index = getattr(self, 'current_key_index', None) + + if key_index is not None and self.__class__._api_key_pool: + # Use the pool's thread-safe method + self.__class__._api_key_pool.mark_key_error(key_index, error_code) + + # If it's a rate limit error, also add to rate limit cache + if error_code == 429: + # Get key identifier safely + with self.__class__._pool_lock: + if key_index < len(self.__class__._api_key_pool.keys): + key = self.__class__._api_key_pool.keys[key_index] + key_id = f"Key#{key_index+1} ({key.model})" + cooldown = getattr(key, 'cooldown', 60) + + # Add to rate limit cache (already thread-safe) + if hasattr(self.__class__, '_rate_limit_cache'): + self.__class__._rate_limit_cache.add_rate_limit(key_id, cooldown) + + def _apply_custom_endpoint_if_needed(self): + """Apply custom endpoint configuration if needed""" + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + + if custom_base_url and use_custom_endpoint: + if not custom_base_url.startswith(('http://', 'https://')): + custom_base_url = 'https://' + custom_base_url + + # Don't override Gemini models - they have their own separate endpoint toggle + if self.client_type == 'gemini': + # Only log if Gemini OpenAI endpoint is not also enabled + use_gemini_endpoint = os.getenv("USE_GEMINI_OPENAI_ENDPOINT", "0") == "1" + if not use_gemini_endpoint: + self._log_once("Gemini model detected, not overriding with custom OpenAI endpoint (use USE_GEMINI_OPENAI_ENDPOINT instead)", is_debug=True) + return + + # Override other model types to use OpenAI client when custom endpoint is enabled + original_client_type = self.client_type + self.client_type = 'openai' + + try: + import openai + # MICROSECOND LOCK: Create custom endpoint client with thread safety + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=custom_base_url + ) + except ImportError: + print(f"[ERROR] OpenAI library not installed, cannot use custom endpoint") + self.client_type = original_client_type # Restore original type + + def _apply_individual_key_endpoint_if_needed(self): + """Apply individual key endpoint if configured (multi-key mode) - works independently of global toggle""" + # Check if this key has an individual endpoint enabled AND configured + has_individual_endpoint = (hasattr(self, 'current_key_azure_endpoint') and + hasattr(self, 'current_key_use_individual_endpoint') and + self.current_key_use_individual_endpoint and + self.current_key_azure_endpoint) + + if has_individual_endpoint: + # Use individual endpoint - works independently of global custom endpoint toggle + individual_endpoint = self.current_key_azure_endpoint + + if not individual_endpoint.startswith(('http://', 'https://')): + individual_endpoint = 'https://' + individual_endpoint + + # Don't override Gemini models - they have their own separate endpoint toggle + if self.client_type == 'gemini': + # Only log if Gemini OpenAI endpoint is not also enabled + use_gemini_endpoint = os.getenv("USE_GEMINI_OPENAI_ENDPOINT", "0") == "1" + if not use_gemini_endpoint: + self._log_once("Gemini model detected, not overriding with individual endpoint (use USE_GEMINI_OPENAI_ENDPOINT instead)", is_debug=True) + return + + # Detect Azure endpoints and route via Azure handler instead of generic OpenAI base_url + url_l = individual_endpoint.lower() + is_azure = (".openai.azure.com" in url_l) or (".cognitiveservices" in url_l) or ("/openai/deployments/" in url_l) + if is_azure: + # Normalize to plain Azure base (strip any trailing /openai/... if present) + azure_base = individual_endpoint.split('/openai')[0] if '/openai' in individual_endpoint else individual_endpoint.rstrip('/') + with self._model_lock: + # Switch this instance to Azure mode for correct routing + self.client_type = 'azure' + self.azure_endpoint = azure_base + # Prefer per-key Azure API version if available + self.azure_api_version = getattr(self, 'current_key_azure_api_version', None) or os.getenv('AZURE_API_VERSION', '2024-02-01') + # Mark that we applied an individual (per-key) endpoint + self._individual_endpoint_applied = True + # Also update TLS so subsequent calls on this thread know it's Azure + try: + tls = self._get_thread_local_client() + with self._model_lock: + tls.azure_endpoint = azure_base + tls.azure_api_version = self.azure_api_version + tls.client_type = 'azure' + except Exception: + pass + print(f"[DEBUG] Individual Azure endpoint applied: {azure_base} (api-version={self.azure_api_version})") + return # Handled; do not fall through to custom endpoint logic + + # Non-Azure: Override to use OpenAI-compatible client against the provided base URL + original_client_type = self.client_type + self.client_type = 'openai' + + try: + import openai + + # MICROSECOND LOCK: Create individual endpoint client with thread safety + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=individual_endpoint + ) + + # Set flag to prevent _setup_client from overriding this client + self._individual_endpoint_applied = True + + # CRITICAL: Update thread-local storage with our correct client + tls = self._get_thread_local_client() + with self._model_lock: + tls.openai_client = self.openai_client + tls.client_type = 'openai' + + return # Individual endpoint applied - don't check global custom endpoint + except ImportError: + self.client_type = original_client_type # Restore original type + return + except Exception as e: + print(f"[ERROR] Failed to create individual endpoint client: {e}") + self.client_type = original_client_type # Restore original type + return + + # If no individual endpoint, check global custom endpoint (but only if global toggle is enabled) + self._apply_custom_endpoint_if_needed() + + # Properties for backward compatibility + @property + def use_multi_keys(self): + """Property for backward compatibility""" + return self._multi_key_mode + + @use_multi_keys.setter + def use_multi_keys(self, value): + """Property setter for backward compatibility""" + self._multi_key_mode = value + + def _ensure_key_rotation(self): + """Ensure we have a key selected and rotate if in multi-key mode""" + if not self.use_multi_keys: + return + + # Force rotation to next key on every request + if self.current_key_index is not None: + # We already have a key, rotate to next + print(f"[DEBUG] Rotating from {self.key_identifier} to next key") + self._force_next_key() + else: + # First request, get initial key + print(f"[DEBUG] First request, selecting initial key") + key_info = self._get_next_available_key() + if key_info: + self._apply_key_change(key_info, "Initial") + else: + raise UnifiedClientError("No available API keys", error_type="no_keys") + + def _force_next_key(self): + """Force rotation to the next key in the pool""" + if not self.use_multi_keys or not self._api_key_pool: + return + + old_key_identifier = self.key_identifier + + # Use force_rotate method to always get next key + key_info = self._api_key_pool.force_rotate_to_next_key() + if key_info: + # Check if it's available + if not key_info[0].is_available(): + print(f"[WARNING] Next key in rotation is on cooldown, but using it anyway") + + self._apply_key_change(key_info, old_key_identifier) + print(f"🔄 Force key rotation: Key change completed, system ready...") + time.sleep(0.5) # Brief pause after force rotation for system stability + else: + print(f"[ERROR] Failed to rotate to next key") + + def _rotate_to_next_key(self) -> bool: + """Rotate to the next available key and reinitialize client - THREAD SAFE""" + if not self.use_multi_keys or not self._api_key_pool: + return False + + old_key_identifier = self.key_identifier + + key_info = self._get_next_available_key() + if key_info: + # MICROSECOND LOCK: Protect all instance variable modifications + with self._model_lock: + # Update key and model + self.api_key = key_info[0].api_key + self.model = key_info[0].model + self.current_key_index = key_info[1] + + # Update key identifier + self.key_identifier = f"Key#{key_info[1]+1} ({self.model})" + + # Reset clients (these are instance variables too!) + self.openai_client = None + self.gemini_client = None + self.mistral_client = None + self.cohere_client = None + + # Logging (outside lock - just reading) - FIX: Add None check + if self.api_key and len(self.api_key) > 12: + masked_key = self.api_key[:8] + "..." + self.api_key[-4:] + else: + masked_key = self.api_key or "***" + print(f"[DEBUG] 🔄 Rotating from {old_key_identifier} to {self.key_identifier} - {masked_key}") + + # Re-setup the client with new key + self._setup_client() + + # Re-apply individual endpoint if needed (this takes priority over global custom endpoint) + self._apply_individual_key_endpoint_if_needed() + + print(f"🔄 Key rotation: Endpoint setup completed, rotation successful...") + time.sleep(0.5) # Brief pause after rotation for system stability + return True + + print(f"[WARNING] No available keys to rotate to") + return False + + def get_stats(self) -> Dict[str, any]: + """Get statistics about API usage""" + stats = dict(self.stats) + + # Add multi-key stats if in multi-key mode + if self._multi_key_mode: # Use instance variable + stats['multi_key_enabled'] = True + stats['force_rotation'] = self._force_rotation # Use instance variable + stats['rotation_frequency'] = self._rotation_frequency # Use instance variable + + if hasattr(self, '_api_key_pool') and self._api_key_pool: + stats['total_keys'] = len(self._api_key_pool.keys) + stats['active_keys'] = sum(1 for k in self._api_key_pool.keys if k.enabled and k.is_available()) + stats['keys_on_cooldown'] = sum(1 for k in self._api_key_pool.keys if k.is_cooling_down) + + # Per-key stats + key_stats = [] + for i, key in enumerate(self._api_key_pool.keys): + key_stat = { + 'index': i, + 'model': key.model, + 'enabled': key.enabled, + 'available': key.is_available(), + 'success_count': key.success_count, + 'error_count': key.error_count, + 'cooling_down': key.is_cooling_down + } + key_stats.append(key_stat) + stats['key_details'] = key_stats + else: + stats['multi_key_enabled'] = False + + return stats + + def diagnose_custom_endpoint(self) -> Dict[str, Any]: + """Diagnose custom endpoint configuration for troubleshooting""" + diagnosis = { + 'timestamp': datetime.now().isoformat(), + 'model': self.model, + 'client_type': getattr(self, 'client_type', None), + 'multi_key_mode': getattr(self, '_multi_key_mode', False), + 'environment_variables': { + 'USE_CUSTOM_OPENAI_ENDPOINT': os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', 'not_set'), + 'OPENAI_CUSTOM_BASE_URL': os.getenv('OPENAI_CUSTOM_BASE_URL', 'not_set'), + 'OPENAI_API_BASE': os.getenv('OPENAI_API_BASE', 'not_set'), + }, + 'client_status': { + 'openai_client_exists': hasattr(self, 'openai_client') and self.openai_client is not None, + 'gemini_client_exists': hasattr(self, 'gemini_client') and self.gemini_client is not None, + 'current_api_key_length': len(self.api_key) if hasattr(self, 'api_key') and self.api_key else 0, + } + } + + # Check if custom endpoint should be applied + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + + diagnosis['custom_endpoint_analysis'] = { + 'toggle_enabled': use_custom_endpoint, + 'custom_url_provided': bool(custom_base_url), + 'should_use_custom_endpoint': use_custom_endpoint and bool(custom_base_url), + 'would_override_model_type': True, # With our fix, it always overrides now + } + + # Determine if there are any issues + issues = [] + if use_custom_endpoint and not custom_base_url: + issues.append("Custom endpoint enabled but no URL provided in OPENAI_CUSTOM_BASE_URL") + if custom_base_url and not use_custom_endpoint: + issues.append("Custom URL provided but toggle USE_CUSTOM_OPENAI_ENDPOINT is disabled") + if not openai and use_custom_endpoint: + issues.append("OpenAI library not installed - cannot use custom endpoints") + + diagnosis['issues'] = issues + diagnosis['status'] = 'OK' if not issues else 'ISSUES_FOUND' + + return diagnosis + + def print_custom_endpoint_diagnosis(self): + """Print a user-friendly diagnosis of custom endpoint configuration""" + diagnosis = self.diagnose_custom_endpoint() + + print("\n🔍 Custom OpenAI Endpoint Diagnosis:") + print(f" Model: {diagnosis['model']}") + print(f" Client Type: {diagnosis['client_type']}") + print(f" Multi-Key Mode: {diagnosis['multi_key_mode']}") + print("\n📋 Environment Variables:") + for key, value in diagnosis['environment_variables'].items(): + print(f" {key}: {value}") + + print("\n🔧 Custom Endpoint Analysis:") + analysis = diagnosis['custom_endpoint_analysis'] + print(f" Toggle Enabled: {analysis['toggle_enabled']}") + print(f" Custom URL Provided: {analysis['custom_url_provided']}") + print(f" Should Use Custom Endpoint: {analysis['should_use_custom_endpoint']}") + + if diagnosis['issues']: + print("\n⚠️ Issues Found:") + for issue in diagnosis['issues']: + print(f" • {issue}") + else: + print("\n✅ No configuration issues detected") + + print(f"\n📊 Status: {diagnosis['status']}\n") + + return diagnosis + + def reset_stats(self): + """Reset usage statistics and pattern tracking""" + self.stats = { + 'total_requests': 0, + 'successful_requests': 0, + 'failed_requests': 0, + 'errors': defaultdict(int), + 'response_times': [], + 'empty_results': 0 + } + + # Reset pattern tracking + self.pattern_counts = {} + self.last_pattern = None + + # Reset conversation tracking if not already set + if not hasattr(self, 'conversation_message_count'): + self.conversation_message_count = 0 + + # Log if logger is available + if hasattr(self, 'logger'): + self.logger.info("Statistics and pattern tracking reset") + else: + print("Statistics and pattern tracking reset") + + def _rotate_to_next_available_key(self, skip_current: bool = False) -> bool: + """ + Rotate to the next available key that's not rate limited + + Args: + skip_current: If True, skip the current key even if it becomes available + """ + if not self._multi_key_mode or not self._api_key_pool: # Use instance variable + return False + + old_key_identifier = self.key_identifier + start_index = self._api_key_pool.current_index + max_attempts = len(self._api_key_pool.keys) + attempts = 0 + + while attempts < max_attempts: + # Get next key from pool + key_info = self._get_next_available_key() + if not key_info: + attempts += 1 + continue + + # Check if this is the same key we started with + potential_key_id = f"Key#{key_info[1]+1} ({key_info[0].model})" + if skip_current and potential_key_id == old_key_identifier: + attempts += 1 + continue + + # Check if this key is rate limited + if not self._rate_limit_cache.is_rate_limited(potential_key_id): + # This key is available, use it + self._apply_key_change(key_info, old_key_identifier) + return True + else: + print(f"[DEBUG] Skipping {potential_key_id} (in cooldown)") + + attempts += 1 + + print(f"[DEBUG] No available keys found after checking all {max_attempts} keys") + + # All keys are on cooldown - wait for shortest cooldown + wait_time = self._get_shortest_cooldown_time() + print(f"[DEBUG] All keys on cooldown. Waiting {wait_time}s...") + + # Wait with cancellation check + for i in range(wait_time): + if hasattr(self, '_cancelled') and self._cancelled: + print(f"[DEBUG] Wait cancelled by user") + return False + time.sleep(1) + if i % 10 == 0 and i > 0: + print(f"[DEBUG] Still waiting... {wait_time - i}s remaining") + + # Clear expired entries and try again + self._rate_limit_cache.clear_expired() + + # Try one more time to find an available key + attempts = 0 + while attempts < max_attempts: + key_info = self._get_next_available_key() + if key_info: + potential_key_id = f"Key#{key_info[1]+1} ({key_info[0].model})" + if not self._rate_limit_cache.is_rate_limited(potential_key_id): + self._apply_key_change(key_info, old_key_identifier) + return True + attempts += 1 + + return False + + def _apply_key_change(self, key_info: tuple, old_key_identifier: str): + """Apply the key change and reinitialize clients""" + self.api_key = key_info[0].api_key + self.model = key_info[0].model + self.current_key_index = key_info[1] + self.key_identifier = f"Key#{key_info[1]+1} ({key_info[0].model})" + + # MICROSECOND LOCK: Atomic update of all key-related variables + with self._model_lock: + self.api_key = key_info[0].api_key + self.model = key_info[0].model + self.current_key_index = key_info[1] + self.key_identifier = f"Key#{key_info[1]+1} ({key_info[0].model})" + + # Reset clients atomically + self.openai_client = None + self.gemini_client = None + self.mistral_client = None + self.cohere_client = None + + # Logging OUTSIDE the lock - FIX: Add None check + if self.api_key and len(self.api_key) > 8: + masked_key = self.api_key[:8] + "..." + self.api_key[-4:] + else: + masked_key = self.api_key or "***" + print(f"[DEBUG] 🔄 Switched from {old_key_identifier} to {self.key_identifier}") + + # Reset clients + self.openai_client = None + self.gemini_client = None + self.mistral_client = None + self.cohere_client = None + + # Re-setup the client with new key + self._setup_client() + + # Re-apply custom endpoint if needed + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + + if custom_base_url and use_custom_endpoint and self.client_type == 'openai': + if not custom_base_url.startswith(('http://', 'https://')): + custom_base_url = 'https://' + custom_base_url + + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=custom_base_url + ) + print(f"[DEBUG] Re-created OpenAI client with custom base URL") + + def _force_rotate_to_untried_key(self, attempted_keys: set) -> bool: + """ + Force rotation to any key that hasn't been tried yet, ignoring cooldown + + Args: + attempted_keys: Set of key identifiers that have already been attempted + """ + if not self._multi_key_mode or not self._api_key_pool: # Use instance variable + return False + + old_key_identifier = self.key_identifier + + # Try each key in the pool + for i in range(len(self._api_key_pool.keys)): + key = self._api_key_pool.keys[i] + potential_key_id = f"Key#{i+1} ({key.model})" + + # Skip if already tried + if potential_key_id in attempted_keys: + continue + + # Found an untried key - use it regardless of cooldown + key_info = (key, i) + self._apply_key_change(key_info, old_key_identifier) + print(f"[DEBUG] 🔄 Force-rotated to untried key: {self.key_identifier}") + return True + + return False + + def get_current_key_info(self) -> str: + """Get information about the currently active key""" + if self._multi_key_mode and self.current_key_index is not None: # Use instance variable + key = self._api_key_pool.keys[self.current_key_index] + status = "Active" if key.is_available() else "Cooling Down" + return f"{self.key_identifier} - Status: {status}, Success: {key.success_count}, Errors: {key.error_count}" + else: + return "Single Key Mode" + + def _generate_unique_thread_dir(self, context: str) -> str: + """Generate a truly unique thread directory with session ID and timestamp""" + thread_name = threading.current_thread().name + thread_id = threading.current_thread().ident + + # Include timestamp and session ID for uniqueness + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:20] + unique_id = f"{thread_name}_{thread_id}_{self.session_id}_{timestamp}" + + thread_dir = os.path.join("Payloads", context, unique_id) + os.makedirs(thread_dir, exist_ok=True) + return thread_dir + + def _get_request_hash(self, messages) -> str: + """Generate a STABLE hash for request deduplication - THREAD-SAFE VERSION + WITH MICROSECOND LOCKING for thread safety.""" + + # MICROSECOND LOCK: Ensure atomic hash generation + with self._model_lock: + # Get thread-specific identifier to prevent cross-thread cache collisions + thread_id = threading.current_thread().ident + thread_name = threading.current_thread().name + + # REMOVED: request_uuid, request_timestamp, request_timestamp_micro + # We want STABLE hashes for caching to work! + + # Create normalized representation (can be done outside lock) + normalized_messages = [] + + for msg in messages: + normalized_msg = { + 'role': msg.get('role', ''), + 'content': msg.get('content', '') + } + + # For image messages, include image size/hash instead of full data + if isinstance(normalized_msg['content'], list): + content_parts = [] + for part in normalized_msg['content']: + if isinstance(part, dict) and 'image_url' in part: + # Hash the image data + image_data = part.get('image_url', {}).get('url', '') + if image_data.startswith('data:'): + # Extract just the data part + image_hash = hashlib.md5(image_data.encode()).hexdigest() + content_parts.append(f"image:{image_hash}") + else: + content_parts.append(f"image_url:{image_data}") + else: + content_parts.append(str(part)) + normalized_msg['content'] = '|'.join(content_parts) + + normalized_messages.append(normalized_msg) + + # MICROSECOND LOCK: Ensure atomic hash generation + with self._model_lock: + # Include thread_id but NO request-specific IDs for stable caching + hash_data = { + 'thread_id': thread_id, # THREAD ISOLATION + 'thread_name': thread_name, # Additional context for debugging + # REMOVED: request_uuid, request_time, request_time_ns + 'messages': normalized_messages, + 'model': self.model, + 'temperature': getattr(self, 'temperature', 0.3), + 'max_tokens': getattr(self, 'max_tokens', 8192) + } + + # Debug logging if needed + if os.getenv("DEBUG_HASH", "0") == "1": + print(f"[HASH] Thread: {thread_name} (ID: {thread_id})") + print(f"[HASH] Model: {self.model}") + + # Create stable JSON representation + hash_str = json.dumps(hash_data, sort_keys=True, ensure_ascii=False) + + # Use SHA256 for better distribution + final_hash = hashlib.sha256(hash_str.encode()).hexdigest() + + if os.getenv("DEBUG_HASH", "0") == "1": + print(f"[HASH] Generated stable hash: {final_hash[:16]}...") + + return final_hash + + def _get_request_hash_with_context(self, messages, context=None) -> str: + """ + Generate a STABLE hash that includes context AND thread info for better deduplication. + WITH MICROSECOND LOCKING for thread safety. + """ + + # MICROSECOND LOCK: Ensure atomic reading of model/settings + with self._model_lock: + # Get thread-specific identifier + thread_id = threading.current_thread().ident + thread_name = threading.current_thread().name + + # REMOVED: request_uuid, request_timestamp, request_timestamp_micro + # We want STABLE hashes for caching to work! + + # Create normalized representation (can be done outside lock) + normalized_messages = [] + + for msg in messages: + normalized_msg = { + 'role': msg.get('role', ''), + 'content': msg.get('content', '') + } + + # Handle image messages + if isinstance(normalized_msg['content'], list): + content_parts = [] + for part in normalized_msg['content']: + if isinstance(part, dict) and 'image_url' in part: + image_data = part.get('image_url', {}).get('url', '') + if image_data.startswith('data:'): + # Use first 1000 chars of image data for hash + image_sample = image_data[:1000] + image_hash = hashlib.md5(image_sample.encode()).hexdigest() + content_parts.append(f"image:{image_hash}") + else: + content_parts.append(f"image_url:{image_data}") + elif isinstance(part, dict): + content_parts.append(json.dumps(part, sort_keys=True)) + else: + content_parts.append(str(part)) + normalized_msg['content'] = '|'.join(content_parts) + + normalized_messages.append(normalized_msg) + + # MICROSECOND LOCK: Ensure atomic hash generation + with self._instance_model_lock: + # Include context, thread info, but NO request-specific IDs + hash_data = { + 'thread_id': thread_id, # THREAD ISOLATION + 'thread_name': thread_name, # Additional thread context + # REMOVED: request_uuid, request_time, request_time_ns + 'context': context, # Include context (e.g., 'translation', 'glossary', etc.) + 'messages': normalized_messages, + 'model': self.model, + 'temperature': getattr(self, 'temperature', 0.3), + 'max_tokens': getattr(self, 'max_tokens', 8192) + } + + # Debug logging if needed + if os.getenv("DEBUG_HASH", "0") == "1": + print(f"[HASH_CONTEXT] Thread: {thread_name} (ID: {thread_id})") + print(f"[HASH_CONTEXT] Context: {context}") + print(f"[HASH_CONTEXT] Model: {self.model}") + + # Create stable JSON representation + hash_str = json.dumps(hash_data, sort_keys=True, ensure_ascii=False) + + # Use SHA256 for better distribution + final_hash = hashlib.sha256(hash_str.encode()).hexdigest() + + if os.getenv("DEBUG_HASH", "0") == "1": + print(f"[HASH_CONTEXT] Generated stable hash: {final_hash[:16]}...") + + return final_hash + + def _get_unique_file_suffix(self, attempt: int = 0) -> str: + """Generate a unique suffix for file names to prevent overwrites + WITH MICROSECOND LOCKING for thread safety.""" + + # MICROSECOND LOCK: Ensure atomic generation of unique identifiers + with self._instance_model_lock: + thread_id = threading.current_thread().ident + timestamp = datetime.now().strftime("%H%M%S%f")[:10] + request_uuid = str(uuid.uuid4())[:8] + + # Create unique suffix for files + suffix = f"_T{thread_id}_A{attempt}_{timestamp}_{request_uuid}" + + return suffix + + def _get_request_hash_with_request_id(self, messages, request_id: str) -> str: + """Generate hash WITH request ID for per-call caching + WITH MICROSECOND LOCKING for thread safety.""" + + # MICROSECOND LOCK: Ensure atomic hash generation + with self._instance_model_lock: + thread_id = threading.current_thread().ident + thread_name = threading.current_thread().name + + # Create normalized representation + normalized_messages = [] + + for msg in messages: + normalized_msg = { + 'role': msg.get('role', ''), + 'content': msg.get('content', '') + } + + # For image messages, include image size/hash instead of full data + if isinstance(normalized_msg['content'], list): + content_parts = [] + for part in normalized_msg['content']: + if isinstance(part, dict) and 'image_url' in part: + image_data = part.get('image_url', {}).get('url', '') + if image_data.startswith('data:'): + image_hash = hashlib.md5(image_data.encode()).hexdigest() + content_parts.append(f"image:{image_hash}") + else: + content_parts.append(f"image_url:{image_data}") + else: + content_parts.append(str(part)) + normalized_msg['content'] = '|'.join(content_parts) + + normalized_messages.append(normalized_msg) + + # MICROSECOND LOCK: Ensure atomic hash generation + with self._instance_model_lock: + hash_data = { + 'thread_id': thread_id, + 'thread_name': thread_name, + 'request_id': request_id, # THIS MAKES EACH send() CALL UNIQUE + 'messages': normalized_messages, + 'model': self.model, + 'temperature': getattr(self, 'temperature', 0.3), + 'max_tokens': getattr(self, 'max_tokens', 8192) + } + + if os.getenv("DEBUG_HASH", "0") == "1": + print(f"[HASH] Thread: {thread_name} (ID: {thread_id})") + print(f"[HASH] Request ID: {request_id}") # Debug the request ID + print(f"[HASH] Model: {self.model}") + + hash_str = json.dumps(hash_data, sort_keys=True, ensure_ascii=False) + final_hash = hashlib.sha256(hash_str.encode()).hexdigest() + + if os.getenv("DEBUG_HASH", "0") == "1": + print(f"[HASH] Generated hash for request {request_id}: {final_hash[:16]}...") + + return final_hash + + def _check_duplicate_request(self, request_hash: str, context: str) -> bool: + """ + Enhanced duplicate detection that properly handles parallel requests. + Returns True only if this exact request is actively being processed. + """ + + # Only check for duplicates in specific contexts + if context not in ['translation', 'glossary', 'image_translation']: + return False + + thread_name = threading.current_thread().name + + # This method is now deprecated in favor of the active_requests tracking + # We keep it for backward compatibility but it just returns False + # The real duplicate detection happens in the send() method using _active_requests + return False + + def _debug_active_requests(self): + """Debug method to show current active requests""" + pass + + def _ensure_thread_safety_init(self): + """ + Ensure all thread safety structures are properly initialized. + Call this during __init__ or before parallel processing. + """ + + # Thread-local storage + if not hasattr(self, '_thread_local'): + self._thread_local = threading.local() + + # File operation locks + if not hasattr(self, '_file_write_locks'): + self._file_write_locks = {} + if not hasattr(self, '_file_write_locks_lock'): + self._file_write_locks_lock = RLock() + + # Legacy tracker (for backward compatibility) + if not hasattr(self, '_tracker_lock'): + self._tracker_lock = RLock() + + def _periodic_cache_cleanup(self): + """ + Periodically clean up expired cache entries and active requests. + Should be called periodically or scheduled with a timer. + """ + pass + + def _get_thread_status(self) -> dict: + """ + Get current status of thread-related structures for debugging. + """ + status = { + 'thread_name': threading.current_thread().name, + 'thread_id': threading.current_thread().ident, + 'cache_size': len(self._request_cache) if hasattr(self, '_request_cache') else 0, + 'active_requests': len(self._active_requests) if hasattr(self, '_active_requests') else 0, + 'multi_key_mode': self._multi_key_mode, + 'current_key': self.key_identifier if hasattr(self, 'key_identifier') else 'Unknown' + } + + # Add thread-local info if available + if hasattr(self, '_thread_local'): + tls = self._get_thread_local_client() + status['thread_local'] = { + 'initialized': getattr(tls, 'initialized', False), + 'key_index': getattr(tls, 'key_index', None), + 'request_count': getattr(tls, 'request_count', 0) + } + + return status + + def cleanup(self): + """ + Enhanced cleanup method to properly release all resources. + Should be called when done with the client or on shutdown. + """ + thread_name = threading.current_thread().name + logger.info(f"[{thread_name}] Cleaning up UnifiedClient resources") + + # Release thread key assignment if in multi-key mode + if self._multi_key_mode and self._api_key_pool: + thread_id = threading.current_thread().ident + self._api_key_pool.release_thread_assignment(thread_id) + + # Clear thread-local storage + if hasattr(self, '_thread_local'): + # Reset thread-local state + self._thread_local.initialized = False + self._thread_local.api_key = None + self._thread_local.model = None + self._thread_local.key_index = None + self._thread_local.request_count = 0 + + logger.info(f"[{thread_name}] Cleanup complete") + + def _get_safe_filename(self, base_filename: str, content_hash: str = None) -> str: + """Generate a safe, unique filename""" + # Add content hash if provided + if content_hash: + name, ext = os.path.splitext(base_filename) + return f"{name}_{content_hash[:8]}{ext}" + return base_filename + + def _is_file_being_written(self, filepath: str) -> bool: + """Check if a file is currently being written by another thread""" + with self._file_lock: + return filepath in self._active_files + + def _mark_file_active(self, filepath: str): + """Mark a file as being written""" + with self._file_lock: + self._active_files.add(filepath) + + def _mark_file_complete(self, filepath: str): + """Mark a file write as complete""" + with self._file_lock: + self._active_files.discard(filepath) + + def _extract_chapter_info(self, messages) -> dict: + """Extract chapter and chunk information from messages and progress file + + Args: + messages: The messages to search for chapter/chunk info + + Returns: + dict with 'chapter', 'chunk', 'total_chunks' + """ + info = { + 'chapter': None, + 'chunk': None, + 'total_chunks': None + } + + messages_str = str(messages) + + # First extract chapter number from messages + chapter_match = re.search(r'Chapter\s+(\d+)', messages_str, re.IGNORECASE) + if not chapter_match: + # Try Section pattern for text files + chapter_match = re.search(r'Section\s+(\d+)', messages_str, re.IGNORECASE) + + if chapter_match: + chapter_num = int(chapter_match.group(1)) + info['chapter'] = str(chapter_num) + + # Now try to get more accurate info from progress file + # Look for translation_progress.json in common locations + possible_paths = [ + 'translation_progress.json', + os.path.join('Payloads', 'translation_progress.json'), + os.path.join(os.getcwd(), 'Payloads', 'translation_progress.json') + ] + + # Check environment variable for output directory + output_dir = os.getenv('OUTPUT_DIRECTORY', '') + if output_dir: + possible_paths.insert(0, os.path.join(output_dir, 'translation_progress.json')) + + progress_file = None + for path in possible_paths: + if os.path.exists(path): + progress_file = path + break + + if progress_file: + try: + with open(progress_file, 'r', encoding='utf-8') as f: + prog = json.load(f) + + # Look through chapters for matching actual_num + for chapter_key, chapter_info in prog.get("chapters", {}).items(): + if chapter_info.get('actual_num') == chapter_num: + # Found it! Get chunk info if available + if chapter_key in prog.get("chapter_chunks", {}): + chunk_data = prog["chapter_chunks"][chapter_key] + info['total_chunks'] = chunk_data.get('total') + + # Get current/latest chunk + completed = chunk_data.get('completed', []) + if completed: + info['chunk'] = str(max(completed) + 1) # Next chunk to process + else: + info['chunk'] = '1' # First chunk + break + except: + pass # Fallback to regex parsing + + # If we didn't get chunk info from progress file, try regex + if not info['chunk']: + chunk_match = re.search(r'Chunk\s+(\d+)/(\d+)', messages_str) + if chunk_match: + info['chunk'] = chunk_match.group(1) + info['total_chunks'] = chunk_match.group(2) + + return info + + + def get_current_key_info(self) -> str: + """Get information about the currently active key""" + if self.use_multi_keys and self.current_key_index is not None: + key = self._api_key_pool.keys[self.current_key_index] + status = "Active" if key.is_available() else "Cooling Down" + return f"{self.key_identifier} - Status: {status}, Success: {key.success_count}, Errors: {key.error_count}" + else: + return "Single Key Mode" + + def _should_rotate(self) -> bool: + """Check if we should rotate keys based on settings""" + if not self.use_multi_keys: + return False + + if not self._force_rotation: + # Only rotate on errors + return False + + # Check frequency + with self._counter_lock: + self._request_counter += 1 + + # Check if it's time to rotate + if self._request_counter >= self._rotation_frequency: + self._request_counter = 0 + return True + else: + return False + + def _get_shortest_cooldown_time(self) -> int: + """Get the shortest cooldown time among all keys""" + # Check if cancelled at start + if self._cancelled: + return 0 # Return immediately if cancelled + + if not self._multi_key_mode or not self.__class__._api_key_pool: + return 60 # Default cooldown + + min_cooldown = float('inf') + now = time.time() + + for i, key in enumerate(self.__class__._api_key_pool.keys): + if key.enabled: + key_id = f"Key#{i+1} ({key.model})" + + # Check rate limit cache + cache_cooldown = self.__class__._rate_limit_cache.get_remaining_cooldown(key_id) + if cache_cooldown > 0: + min_cooldown = min(min_cooldown, cache_cooldown) + + # Also check key's own cooldown + if key.is_cooling_down and key.last_error_time: + remaining = key.cooldown - (now - key.last_error_time) + if remaining > 0: + min_cooldown = min(min_cooldown, remaining) + + # Add random jitter to prevent thundering herd (0-5 seconds) + jitter = random.randint(0, 5) + + # Return the minimum wait time plus jitter, capped at 60 seconds + base_time = int(min_cooldown) if min_cooldown != float('inf') else 30 + return min(base_time + jitter, 60) + + def _execute_with_retry(self, + perform, + messages, + temperature, + max_tokens, + max_completion_tokens, + context, + request_id, + is_image: bool = False) -> Tuple[str, Optional[str]]: + """ + Simplified shim retained for compatibility. Executes once without internal retry. + """ + result = perform(messages, temperature, max_tokens, max_completion_tokens, context, None, request_id) + if not result or not isinstance(result, tuple): + raise UnifiedClientError("Invalid result from perform()", error_type="unexpected") + return result + def _send_core(self, + messages, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + max_completion_tokens: Optional[int] = None, + context: Optional[str] = None, + image_data: Any = None) -> Tuple[str, Optional[str]]: + """ + Unified front for send and send_image. Includes multi-key retry wrapper. + """ + batch_mode = os.getenv("BATCH_TRANSLATION", "0") == "1" + if not batch_mode: + self._sequential_send_lock.acquire() + try: + self.reset_cleanup_state() + # Pre-stagger log so users see what's being sent before delay + self._log_pre_stagger(messages, context or ('image_translation' if image_data else 'translation')) + self._apply_thread_submission_delay() + request_id = str(uuid.uuid4())[:8] + + # Multi-key retry wrapper + if self._multi_key_mode: + # Check if indefinite retry is enabled for multi-key mode too + indefinite_retry_enabled = os.getenv("INDEFINITE_RATE_LIMIT_RETRY", "1") == "1" + last_error = None + attempt = 0 + + while True: # Indefinite retry loop when enabled + try: + if image_data is None: + return self._send_internal(messages, temperature, max_tokens, max_completion_tokens, context, retry_reason=None, request_id=request_id) + else: + return self._send_image_internal(messages, image_data, temperature, max_tokens, max_completion_tokens, context, retry_reason=None, request_id=request_id) + + except UnifiedClientError as e: + last_error = e + + # Handle rate limit errors with key rotation + if e.error_type == "rate_limit" or self._is_rate_limit_error(e): + attempt += 1 + + if indefinite_retry_enabled: + print(f"🔄 Multi-key mode: Rate limit hit, attempting key rotation (indefinite retry, attempt {attempt})") + else: + # Limited retry mode - respect max attempts per key + num_keys = len(self._api_key_pool.keys) if self._api_key_pool else 3 + max_attempts = num_keys * 2 # Allow 2 attempts per key + print(f"🔄 Multi-key mode: Rate limit hit, attempting key rotation (attempt {attempt}/{max_attempts})") + + if attempt >= max_attempts: + print(f"❌ Multi-key mode: Exhausted {max_attempts} attempts, giving up") + raise + + try: + # Rotate to next key + self._handle_rate_limit_for_thread() + print(f"🔄 Multi-key retry: Key rotation completed, preparing for next attempt...") + time.sleep(0.1) # Brief pause after key rotation for system stability + + # Check if we have any available keys left after rotation + available_keys = self._count_available_keys() + if available_keys == 0: + print(f"🔄 Multi-key mode: All keys rate-limited, waiting for cooldown...") + # Wait a bit before trying again + wait_time = min(60 + random.uniform(1, 10), 120) # 60-70 seconds + print(f"🔄 Multi-key mode: Waiting {wait_time:.1f}s for keys to cool down") + + wait_start = time.time() + while time.time() - wait_start < wait_time: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + + # Continue to next attempt with rotated key + continue + + except Exception as rotation_error: + print(f"❌ Multi-key mode: Key rotation failed: {rotation_error}") + # If rotation fails, we can't continue with multi-key retry + if indefinite_retry_enabled: + # In indefinite mode, try to continue with any available key + print(f"🔄 Multi-key mode: Key rotation failed, but indefinite retry enabled - continuing...") + time.sleep(5) # Brief pause before trying again + continue + else: + break + else: + # Non-rate-limit error, don't retry with different keys + raise + + # This point is only reached in non-indefinite mode when giving up + if last_error: + print(f"❌ Multi-key mode: All retry attempts failed") + raise last_error + else: + raise UnifiedClientError("All multi-key attempts failed", error_type="no_keys") + else: + # Single key mode - direct call + if image_data is None: + return self._send_internal(messages, temperature, max_tokens, max_completion_tokens, context, retry_reason=None, request_id=request_id) + else: + return self._send_image_internal(messages, image_data, temperature, max_tokens, max_completion_tokens, context, retry_reason=None, request_id=request_id) + finally: + if not batch_mode: + self._sequential_send_lock.release() + + def _get_thread_assigned_key(self) -> Optional[int]: + """Get the key index assigned to current thread""" + thread_id = threading.current_thread().ident + + with self._key_assignment_lock: + if thread_id in self._thread_key_assignments: + key_index, timestamp = self._thread_key_assignments[thread_id] + # Check if assignment is still valid (not expired) + if time.time() - timestamp < 300: # 5 minute expiry + return key_index + else: + # Expired, remove it + del self._thread_key_assignments[thread_id] + + return None + + def _assign_key_to_thread(self, key_index: int): + """Assign a key to the current thread""" + thread_id = threading.current_thread().ident + + with self._key_assignment_lock: + self._thread_key_assignments[thread_id] = (key_index, time.time()) + + # Cleanup old assignments + current_time = time.time() + expired_threads = [ + tid for tid, (_, ts) in self._thread_key_assignments.items() + if current_time - ts > 300 + ] + for tid in expired_threads: + del self._thread_key_assignments[tid] + + + def _setup_client(self): + """Setup the appropriate client based on model type""" + model_lower = self.model.lower() + tls = self._get_thread_local_client() + + # Determine client_type (no lock needed, just reading) + self.client_type = None + for prefix, provider in self.MODEL_PROVIDERS.items(): + if model_lower.startswith(prefix): + self.client_type = provider + break + + # Check if we're using a custom OpenAI base URL + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', os.getenv('OPENAI_API_BASE', '')) + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + + # Apply custom endpoint logic when enabled - override any model type (except Gemini which has its own toggle) + if custom_base_url and custom_base_url != 'https://api.openai.com/v1' and use_custom_endpoint: + if not self.client_type: + # No prefix matched - assume it's a custom model that should use OpenAI endpoint + self.client_type = 'openai' + logger.info(f"Using OpenAI client for custom endpoint with unmatched model: {self.model}") + elif self.client_type == 'openai': + logger.info(f"Using custom OpenAI endpoint for OpenAI model: {self.model}") + elif self.client_type == 'gemini': + # Don't override Gemini - it has its own separate endpoint toggle + # Only log if Gemini OpenAI endpoint is not also enabled + use_gemini_endpoint = os.getenv("USE_GEMINI_OPENAI_ENDPOINT", "0") == "1" + if not use_gemini_endpoint: + self._log_once(f"Gemini model detected, not overriding with custom OpenAI endpoint (use USE_GEMINI_OPENAI_ENDPOINT instead)") + else: + # Override other model types to use custom OpenAI endpoint when toggle is enabled + original_client_type = self.client_type + self.client_type = 'openai' + print(f"[DEBUG] Custom endpoint override: {original_client_type} -> openai for model '{self.model}'") + logger.info(f"Custom endpoint enabled: Overriding {original_client_type} model {self.model} to use OpenAI client") + elif not use_custom_endpoint and custom_base_url and self.client_type == 'openai': + logger.info("Custom OpenAI endpoint disabled via toggle, using default endpoint") + + # If still no client type, show error with suggestions + if not self.client_type: + # Provide helpful suggestions + suggestions = [] + for prefix in self.MODEL_PROVIDERS.keys(): + if prefix in model_lower or model_lower[:3] in prefix: + suggestions.append(prefix) + + error_msg = f"Unsupported model: {self.model}. " + if suggestions: + error_msg += f"Did you mean to use one of these prefixes? {suggestions}. " + else: + # Check if it might be an aggregator model + if any(provider in model_lower for provider in ['yi', 'qwen', 'llama', 'gpt', 'claude']): + error_msg += f"If using ElectronHub, prefix with 'eh/' (e.g., eh/{self.model}). " + error_msg += f"If using OpenRouter, prefix with 'or/' (e.g., or/{self.model}). " + error_msg += f"If using Poe, prefix with 'poe/' (e.g., poe/{self.model}). " + error_msg += f"Supported prefixes: {list(self.MODEL_PROVIDERS.keys())}" + raise ValueError(error_msg) + + # Initialize variables at method scope for all client types + base_url = None + use_gemini_endpoint = False + gemini_endpoint = "" + + # Prepare provider-specific settings (but don't create clients yet) + if self.client_type == 'openai': + if openai is None: + raise ImportError("OpenAI library not installed. Install with: pip install openai") + + elif self.client_type == 'gemini': + # Check if we should use OpenAI-compatible endpoint for Gemini + use_gemini_endpoint = os.getenv("USE_GEMINI_OPENAI_ENDPOINT", "0") == "1" + gemini_endpoint = os.getenv("GEMINI_OPENAI_ENDPOINT", "") + + if use_gemini_endpoint and gemini_endpoint: + # Use OpenAI client for Gemini with custom endpoint + #print(f"[DEBUG] Preparing Gemini with OpenAI-compatible endpoint") + pass + if openai is None: + raise ImportError("OpenAI library not installed. Install with: pip install openai") + + # Ensure endpoint has proper format + if not gemini_endpoint.endswith('/openai/'): + if gemini_endpoint.endswith('/'): + gemini_endpoint = gemini_endpoint + 'openai/' + else: + gemini_endpoint = gemini_endpoint + '/openai/' + + # Set base_url for Gemini OpenAI endpoint + base_url = gemini_endpoint + + print(f"[DEBUG] Gemini will use OpenAI-compatible endpoint: {gemini_endpoint}") + + disable_safety = os.getenv("DISABLE_GEMINI_SAFETY", "false").lower() == "true" + + config_data = { + "type": "GEMINI_OPENAI_ENDPOINT_REQUEST", + "model": self.model, + "endpoint": gemini_endpoint, + "safety_enabled": not disable_safety, + "safety_settings": "DISABLED_VIA_OPENAI_ENDPOINT" if disable_safety else "DEFAULT", + "timestamp": datetime.now().isoformat(), + } + + # Just call the existing save method + self._save_gemini_safety_config(config_data, None) + else: + # Use native Gemini client + #print(f"[DEBUG] Preparing native Gemini client") + if not GENAI_AVAILABLE: + raise ImportError( + "Google Gen AI library not installed. Install with: " + "pip install google-genai" + ) + + elif self.client_type == 'electronhub': + # ElectronHub uses OpenAI SDK if available + if openai is not None: + logger.info("ElectronHub will use OpenAI SDK for API calls") + else: + logger.info("ElectronHub will use HTTP API for API calls") + + elif self.client_type == 'chutes': + # chutes uses OpenAI-compatible endpoint + if openai is not None: + chutes_base_url = os.getenv("CHUTES_API_URL", "https://llm.chutes.ai/v1") + + # MICROSECOND LOCK for chutes client + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=chutes_base_url + ) + logger.info(f"chutes client configured with endpoint: {chutes_base_url}") + else: + logger.info("chutes will use HTTP API") + + elif self.client_type == 'mistral': + if MistralClient is None: + # Fall back to HTTP API if SDK not installed + logger.info("Mistral SDK not installed, will use HTTP API") + + elif self.client_type == 'cohere': + if cohere is None: + logger.info("Cohere SDK not installed, will use HTTP API") + + elif self.client_type == 'anthropic': + if anthropic is None: + logger.info("Anthropic SDK not installed, will use HTTP API") + else: + # Store API key for HTTP fallback + self.anthropic_api_key = self.api_key + logger.info("Anthropic client configured") + + elif self.client_type == 'deepseek': + # DeepSeek typically uses OpenAI-compatible endpoint + if openai is None: + logger.info("DeepSeek will use HTTP API") + else: + base_url = os.getenv("DEEPSEEK_API_URL", "https://api.deepseek.com/v1") + logger.info(f"DeepSeek will use endpoint: {base_url}") + + elif self.client_type == 'groq': + # Groq uses OpenAI-compatible endpoint + if openai is None: + logger.info("Groq will use HTTP API") + else: + base_url = os.getenv("GROQ_API_URL", "https://api.groq.com/openai/v1") + logger.info(f"Groq will use endpoint: {base_url}") + + elif self.client_type == 'fireworks': + # Fireworks uses OpenAI-compatible endpoint + if openai is None: + logger.info("Fireworks will use HTTP API") + else: + base_url = os.getenv("FIREWORKS_API_URL", "https://api.fireworks.ai/inference/v1") + logger.info(f"Fireworks will use endpoint: {base_url}") + + elif self.client_type == 'xai': + # xAI (Grok) uses OpenAI-compatible endpoint + if openai is None: + logger.info("xAI will use HTTP API") + else: + base_url = os.getenv("XAI_API_URL", "https://api.x.ai/v1") + logger.info(f"xAI will use endpoint: {base_url}") + + # ===================================================== + # MICROSECOND LOCK: Create ALL clients with thread safety + # ===================================================== + + if self.client_type == 'openai': + # Skip if individual endpoint already applied + if hasattr(self, '_individual_endpoint_applied') and self._individual_endpoint_applied: + return + + # MICROSECOND LOCK for OpenAI client + with self._model_lock: + # Use regular OpenAI client - individual endpoint will be set later + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url='https://api.openai.com/v1' # Default, will be overridden by individual endpoint + ) + + elif self.client_type == 'gemini': + if use_gemini_endpoint and gemini_endpoint: + # Use OpenAI client for Gemini endpoint + if base_url is None: + base_url = gemini_endpoint + + # MICROSECOND LOCK for Gemini with OpenAI endpoint + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=base_url + ) + self._original_client_type = 'gemini' + self.client_type = 'openai' + print(f"[DEBUG] Gemini using OpenAI-compatible endpoint: {base_url}") + else: + # MICROSECOND LOCK for native Gemini client + # Check if this key has Google credentials (multi-key mode) + google_creds = None + if hasattr(self, 'current_key_google_creds') and self.current_key_google_creds: + google_creds = self.current_key_google_creds + print(f"[DEBUG] Using key-specific Google credentials: {os.path.basename(google_creds)}") + # Set environment variable for this request + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_creds + elif hasattr(self, 'google_creds_path') and self.google_creds_path: + google_creds = self.google_creds_path + print(f"[DEBUG] Using default Google credentials: {os.path.basename(google_creds)}") + + with self._model_lock: + self.gemini_client = genai.Client(api_key=self.api_key) + if hasattr(tls, 'model'): + tls.gemini_configured = True + tls.gemini_api_key = self.api_key + tls.gemini_client = self.gemini_client + + #print(f"[DEBUG] Created native Gemini client for model: {self.model}") + + elif self.client_type == 'mistral': + if MistralClient is not None: + # MICROSECOND LOCK for Mistral client + if hasattr(self, '_instance_model_lock'): + with self._instance_model_lock: + self.mistral_client = MistralClient(api_key=self.api_key) + else: + self.mistral_client = MistralClient(api_key=self.api_key) + logger.info("Mistral client created") + + elif self.client_type == 'cohere': + if cohere is not None: + # MICROSECOND LOCK for Cohere client + with self._model_lock: + self.cohere_client = cohere.Client(self.api_key) + logger.info("Cohere client created") + + elif self.client_type == 'deepseek': + if openai is not None: + if base_url is None: + base_url = os.getenv("DEEPSEEK_API_URL", "https://api.deepseek.com/v1") + + # MICROSECOND LOCK for DeepSeek client + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=base_url + ) + logger.info(f"DeepSeek client configured with endpoint: {base_url}") + + elif self.client_type == 'groq': + if openai is not None: + if base_url is None: + base_url = os.getenv("GROQ_API_URL", "https://api.groq.com/openai/v1") + + # MICROSECOND LOCK for Groq client + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=base_url + ) + logger.info(f"Groq client configured with endpoint: {base_url}") + + elif self.client_type == 'fireworks': + if openai is not None: + if base_url is None: + base_url = os.getenv("FIREWORKS_API_URL", "https://api.fireworks.ai/inference/v1") + + # MICROSECOND LOCK for Fireworks client + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=base_url + ) + logger.info(f"Fireworks client configured with endpoint: {base_url}") + + elif self.client_type == 'xai': + if openai is not None: + if base_url is None: + base_url = os.getenv("XAI_API_URL", "https://api.x.ai/v1") + + # MICROSECOND LOCK for xAI client + with self._model_lock: + self.openai_client = openai.OpenAI( + api_key=self.api_key, + base_url=base_url + ) + logger.info(f"xAI client configured with endpoint: {base_url}") + + elif self.client_type == 'deepl' or self.model.startswith('deepl'): + self.client_type = 'deepl' + self.client = None # No persistent client needed + return + + elif self.client_type == 'google_translate' or self.model.startswith('google-translate'): + self.client_type = 'google_translate' + self.client = None # No persistent client needed + return + + elif self.client_type == 'vertex_model_garden': + # Vertex AI doesn't need a client created here + logger.info("Vertex AI Model Garden will initialize on demand") + + elif self.client_type in ['yi', 'qwen', 'baichuan', 'zhipu', 'moonshot', 'baidu', + 'tencent', 'iflytek', 'bytedance', 'minimax', + 'sensenova', 'internlm', 'tii', 'microsoft', + 'azure', 'google', 'alephalpha', 'databricks', + 'huggingface', 'salesforce', 'bigscience', 'meta', + 'electronhub', 'poe', 'openrouter', 'chutes']: + # These providers will use HTTP API or OpenAI-compatible endpoints + # No client initialization needed here + logger.info(f"{self.client_type} will use HTTP API or compatible endpoint") + + # Store thread-local client reference if in multi-key mode + if self._multi_key_mode and hasattr(tls, 'model'): + # MICROSECOND LOCK for thread-local storage + with self._model_lock: + tls.client_type = self.client_type + if hasattr(self, 'openai_client'): + tls.openai_client = self.openai_client + if hasattr(self, 'gemini_client'): + tls.gemini_client = self.gemini_client + if hasattr(self, 'mistral_client'): + tls.mistral_client = self.mistral_client + if hasattr(self, 'cohere_client'): + tls.cohere_client = self.cohere_client + else: + tls.client_type = self.client_type + if hasattr(self, 'openai_client'): + tls.openai_client = self.openai_client + if hasattr(self, 'gemini_client'): + tls.gemini_client = self.gemini_client + if hasattr(self, 'mistral_client'): + tls.mistral_client = self.mistral_client + if hasattr(self, 'cohere_client'): + tls.cohere_client = self.cohere_client + + # Log retry feature support + logger.info(f"✅ Initialized {self.client_type} client for model: {self.model}") + logger.debug("✅ GUI retry features supported: truncation detection, timeout handling, duplicate detection") + + def send(self, messages, temperature=None, max_tokens=None, + max_completion_tokens=None, context=None) -> Tuple[str, Optional[str]]: + """Backwards-compatible public API; now delegates to unified _send_core.""" + return self._send_core(messages, temperature, max_tokens, max_completion_tokens, context, image_data=None) + + def _send_internal(self, messages, temperature=None, max_tokens=None, + max_completion_tokens=None, context=None, retry_reason=None, + request_id=None, image_data=None) -> Tuple[str, Optional[str]]: + """ + Unified internal send implementation for both text and image requests. + Pass image_data=None for text requests, or image bytes/base64 for image requests. + """ + # Determine if this is an image request + is_image_request = image_data is not None + + # Use appropriate context default + if context is None: + context = 'image_translation' if is_image_request else 'translation' + + # Always ensure per-request key assignment/rotation for multi-key mode + # This guarantees forced rotation when rotation_frequency == 1 + if getattr(self, '_multi_key_mode', False): + try: + self._ensure_thread_client() + except UnifiedClientError: + # Propagate known client errors + raise + except Exception as e: + # Normalize unexpected failures + raise UnifiedClientError(f"Failed to acquire API key for thread: {e}", error_type="no_keys") + + # Handle refactored mode with disabled internal retry + if getattr(self, '_disable_internal_retry', False): + t0 = time.time() + + # For image requests, prepare messages with embedded image + if image_data: + messages = self._prepare_image_messages(messages, image_data) + + # Validate request + valid, error_msg = self._validate_request(messages, max_tokens) + if not valid: + raise UnifiedClientError(f"Invalid request: {error_msg}", error_type="validation") + + # File names and payload save + payload_name, response_name = self._get_file_names(messages, context=self.context or context) + self._save_payload(messages, payload_name, retry_reason=retry_reason) + + # Get response via provider router + response = self._get_response(messages, temperature, max_tokens, max_completion_tokens, response_name) + + # Extract text uniformly + extracted_content, finish_reason = self._extract_response_text(response, provider=getattr(self, 'client_type', 'unknown')) + + # Save response if any + if extracted_content: + self._save_response(extracted_content, response_name) + + # Stats and success mark + self._track_stats(context, True, None, time.time() - t0) + self._mark_key_success() + + # API delay between calls (respects GUI setting) + self._apply_api_delay() + + return extracted_content, finish_reason + + # Main implementation with retry logic + start_time = time.time() + + # Generate request hash WITH request ID if provided + if image_data: + image_size = len(image_data) if isinstance(image_data, (bytes, str)) else 0 + if request_id: + messages_hash = self._get_request_hash_with_request_id(messages, request_id) + else: + request_id = str(uuid.uuid4())[:8] + messages_hash = self._get_request_hash_with_request_id(messages, request_id) + request_hash = f"{messages_hash}_img{image_size}" + else: + if request_id: + request_hash = self._get_request_hash_with_request_id(messages, request_id) + else: + request_id = str(uuid.uuid4())[:8] + request_hash = self._get_request_hash_with_request_id(messages, request_id) + + thread_name = threading.current_thread().name + + # Log with hash for tracking + logger.debug(f" Request ID: {request_id}") + logger.debug(f" Hash: {request_hash[:8]}...") + logger.debug(f" Retry reason: {retry_reason}") + + # Log with hash for tracking + logger.debug(f"[{thread_name}] _send_internal starting for {context} (hash: {request_hash[:8]}...) retry_reason: {retry_reason}") + + # Reset cancelled flag + self._cancelled = False + + # Reset counters when context changes + if context != self.current_session_context: + self.reset_conversation_for_new_context(context) + + self.context = context or 'translation' + self.conversation_message_count += 1 + + # Internal retry logic for 500 errors - now optionally disabled (centralized retry handles it) + internal_retries = self._get_max_retries() + base_delay = 5 # Base delay for exponential backoff + + # Track if we've tried main key for prohibited content + main_key_attempted = False + + # Initialize variables that might be referenced in exception handlers + extracted_content = "" + finish_reason = 'error' + + # Track whether we already attempted a Gemma/OpenRouter system->user retry + gemma_no_system_retry_done = False + + for attempt in range(internal_retries): + try: + # Validate request + valid, error_msg = self._validate_request(messages, max_tokens) + if not valid: + raise UnifiedClientError(f"Invalid request: {error_msg}", error_type="validation") + + os.makedirs("Payloads", exist_ok=True) + + # Apply reinforcement + messages = self._apply_pure_reinforcement(messages) + + # For image requests, prepare messages with embedded image + if image_data: + messages = self._prepare_image_messages(messages, image_data) + + # Get file names - now unique per request AND attempt + payload_name, response_name = self._get_file_names(messages, context=self.context) + + # Add request ID and attempt to filename for complete isolation + payload_name, response_name = self._with_attempt_suffix(payload_name, response_name, request_id, attempt, is_image=bool(image_data)) + + # Save payload with retry reason + # On internal retries (500 errors), add that info too + if attempt > 0: + internal_retry_reason = f"500_error_attempt_{attempt}" + if retry_reason: + combined_reason = f"{retry_reason}_{internal_retry_reason}" + else: + combined_reason = internal_retry_reason + self._save_payload(messages, payload_name, retry_reason=combined_reason) + else: + self._save_payload(messages, payload_name, retry_reason=retry_reason) + + # FIX: Define payload_messages BEFORE using it + # Create a sanitized version for payload (without actual image data) + payload_messages = [ + {**msg, 'content': 'IMAGE_DATA_OMITTED' if isinstance(msg.get('content'), list) else msg.get('content')} + for msg in messages + ] + + # Now save the payload (payload_messages is now defined) + #self._save_payload(payload_messages, payload_name) + + + # Set idempotency context for downstream calls + self._set_idempotency_context(request_id, attempt) + + # Unified provider dispatch: for image requests, messages already embed the image. + # Route via the same _get_response used for text; Gemini handler internally detects images. + response = self._get_response(messages, temperature, max_tokens, max_completion_tokens, response_name) + + # Check for cancellation (from timeout or stop button) + if self._cancelled: + if not self._is_stop_requested(): + logger.info("Operation cancelled (timeout or user stop)") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + # ====== UNIVERSAL EXTRACTION INTEGRATION ====== + # Use universal extraction instead of assuming response.content exists + extracted_content = "" + finish_reason = 'stop' + + if response: + # Prepare provider-specific parameters + extraction_kwargs = {} + + # Add provider-specific parameters if applicable + extraction_kwargs.update(self._get_extraction_kwargs()) + +# Try universal extraction with provider-specific parameters + extracted_content, finish_reason = self._extract_response_text( + response, + provider=getattr(self, 'client_type', 'unknown'), + **extraction_kwargs + ) + + # If extraction failed but we have a response object + if not extracted_content and response: + print(f"⚠️ Failed to extract text from {getattr(self, 'client_type', 'unknown')} response") + print(f" Response type: {type(response)}") + + # Provider-specific guidance + if getattr(self, 'client_type', None) == 'gemini': + print(f" Consider checking Gemini response structure") + print(f" Response attributes: {dir(response)[:5]}...") # Show first 5 attributes + else: + print(f" Consider checking response extraction for this provider") + + # Log the response structure for debugging + self._save_failed_request(messages, "Extraction failed", context, response) + + # Check if response has any common attributes we missed + if hasattr(response, 'content') and response.content: + extracted_content = str(response.content) + print(f" Fallback: Using response.content directly") + elif hasattr(response, 'text') and response.text: + extracted_content = str(response.text) + print(f" Fallback: Using response.text directly") + + # Update response object with extracted content + if extracted_content and hasattr(response, 'content'): + response.content = extracted_content + elif extracted_content: + # Create a new response object if needed + response = UnifiedResponse( + content=extracted_content, + finish_reason=finish_reason, + raw_response=response + ) + + # CRITICAL: Save response for duplicate detection + # This must happen even for truncated/empty responses + if extracted_content: + self._save_response(extracted_content, response_name) + + # Handle empty responses + if not extracted_content or extracted_content.strip() in ["", "[]", "[IMAGE TRANSLATION FAILED]"]: + is_likely_safety_filter = self._detect_safety_filter(messages, extracted_content, finish_reason, response, getattr(self, 'client_type', 'unknown')) + if is_likely_safety_filter and not main_key_attempted and self._multi_key_mode and getattr(self, 'original_api_key', None) and getattr(self, 'original_model', None): + main_key_attempted = True + try: + retry_res = self._retry_with_main_key(messages, temperature, max_tokens, max_completion_tokens, context, request_id=request_id, image_data=image_data) + if retry_res: + content, fr = retry_res + if content and content.strip() and len(content) > 10: + return content, fr + except Exception: + pass + # Finalize empty handling + req_type = 'image' if image_data else 'text' + return self._finalize_empty_response(messages, context, response, extracted_content or "", finish_reason, getattr(self, 'client_type', 'unknown'), req_type, start_time) + + # Track success + self._track_stats(context, True, None, time.time() - start_time) + + # Mark key as successful in multi-key mode + self._mark_key_success() + + # Check for truncation and handle retry if enabled + if finish_reason in ['length', 'max_tokens']: + print(f"Response was truncated: {finish_reason}") + print(f"⚠️ Response truncated (finish_reason: {finish_reason})") + + # ALWAYS log truncation failures + self._log_truncation_failure( + messages=messages, + response_content=extracted_content, + finish_reason=finish_reason, + context=context, + error_details=getattr(response, 'error_details', None) if response else None + ) + + # Check if retry on truncation is enabled + retry_truncated_enabled = os.getenv("RETRY_TRUNCATED", "0") == "1" + + if retry_truncated_enabled: + print(f" 🔄 RETRY_TRUNCATED enabled - attempting to retry with increased token limit") + + # Get the max retry tokens limit + max_retry_tokens = int(os.getenv("MAX_RETRY_TOKENS", "16384")) + current_max_tokens = max_tokens or 8192 + + if current_max_tokens < max_retry_tokens: + new_max_tokens = min(current_max_tokens * 2, max_retry_tokens) + print(f" 📊 Retrying with increased tokens: {current_max_tokens} → {new_max_tokens}") + + try: + # Recursive call with increased token limit + retry_content, retry_finish_reason = self._send_internal( + messages=messages, + temperature=temperature, + max_tokens=new_max_tokens, + max_completion_tokens=max_completion_tokens, + context=context, + retry_reason=f"truncation_retry_{finish_reason}", + request_id=request_id, + image_data=image_data + ) + + # Check if retry succeeded (not truncated) + if retry_finish_reason not in ['length', 'max_tokens']: + print(f" ✅ Truncation retry succeeded: {len(retry_content)} chars") + return retry_content, retry_finish_reason + else: + print(f" ⚠️ Retry was also truncated, returning original response") + + except Exception as retry_error: + print(f" ❌ Truncation retry failed: {retry_error}") + else: + print(f" 📊 Already at max retry tokens ({current_max_tokens}), not retrying") + else: + print(f" 📋 RETRY_TRUNCATED disabled - accepting truncated response") + + # Apply API delay after successful call (even if truncated) + # SKIP DELAY DURING CLEANUP + + self._apply_api_delay() + + # Brief stability pause after API call completion + if not getattr(self, '_in_cleanup', False): + time.sleep(0.1) # System stability pause after API completion + + # If the provider signaled a content filter, elevate to prohibited_content to trigger retries + if finish_reason == 'content_filter': + raise UnifiedClientError( + "Content blocked by provider", + error_type="prohibited_content", + http_status=400 + ) + + # Return the response with accurate finish_reason + # This is CRITICAL for retry mechanisms to work + return extracted_content, finish_reason + + except UnifiedClientError as e: + # Handle cancellation specially for timeout support + if e.error_type == "cancelled" or "cancelled" in str(e): + self._in_cleanup = False # Ensure cleanup flag is set + if not self._is_stop_requested(): + logger.info("Propagating cancellation to caller") + # Re-raise so send_with_interrupt can handle it + raise + + print(f"UnifiedClient error: {e}") + + # Check if it's a rate limit error - handle according to mode + error_str = str(e).lower() + if self._is_rate_limit_error(e): + # In multi-key mode, always re-raise to let _send_core handle key rotation + if self._multi_key_mode: + print(f"🔄 Rate limit error - multi-key mode active, re-raising for key rotation") + raise + + # In single-key mode, check if indefinite retry is enabled + indefinite_retry_enabled = os.getenv("INDEFINITE_RATE_LIMIT_RETRY", "1") == "1" + + if indefinite_retry_enabled: + # Calculate wait time from Retry-After header if available + retry_after_seconds = 60 # Default wait time + if hasattr(e, 'http_status') and e.http_status == 429: + # Try to extract Retry-After from the error if it contains header info + error_details = str(e) + if 'retry-after' in error_details.lower(): + import re + match = re.search(r'retry-after[:\s]+([0-9]+)', error_details.lower()) + if match: + retry_after_seconds = int(match.group(1)) + + # Add some jitter and cap the wait time + wait_time = min(retry_after_seconds + random.uniform(1, 10), 300) # Max 5 minutes + + print(f"🔄 Rate limit error - single-key indefinite retry, waiting {wait_time:.1f}s (attempt {attempt + 1}/{internal_retries})") + + # Wait with cancellation check + wait_start = time.time() + while time.time() - wait_start < wait_time: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + + # For rate limit errors, continue retrying without counting against max retries + # Reset attempt counter to avoid exhausting retries on rate limits + attempt = max(0, attempt - 1) # Don't count rate limit waits against retry budget + continue # Retry the attempt + else: + print(f"❌ Rate limit error - single-key mode, indefinite retry disabled, re-raising") + raise + + # Check for prohibited content — treat any HTTP 400 as prohibited to force fallback + if ( + e.error_type == "prohibited_content" + or getattr(e, 'http_status', None) == 400 + or " 400 " in error_str + or self._detect_safety_filter(messages, extracted_content or "", finish_reason, None, getattr(self, 'client_type', 'unknown')) + ): + print(f"❌ Prohibited content detected: {error_str[:200]}") + + # Different behavior based on mode + if self._multi_key_mode: + # Multi-key mode: Attempt main key retry once, then fall through to fallback + if not main_key_attempted: + main_key_attempted = True + retry_res = self._maybe_retry_main_key_on_prohibited( + messages, temperature, max_tokens, max_completion_tokens, context, request_id=request_id, image_data=image_data + ) + if retry_res: + res_content, res_fr = retry_res + if res_content and res_content.strip(): + return res_content, res_fr + else: + # Single-key mode: Check if fallback keys are enabled + use_fallback_keys = os.getenv('USE_FALLBACK_KEYS', '0') == '1' + if use_fallback_keys: + print(f"[FALLBACK DIRECT] Using fallback keys") + # Try fallback keys directly without retrying main key + retry_res = self._try_fallback_keys_direct( + messages, temperature, max_tokens, max_completion_tokens, context, request_id=request_id, image_data=image_data + ) + if retry_res: + res_content, res_fr = retry_res + if res_content and res_content.strip(): + return res_content, res_fr + else: + print(f"[SINGLE-KEY MODE] Fallback keys disabled - no retry available") + + # Fallthrough: record and return generic fallback + self._save_failed_request(messages, e, context) + self._track_stats(context, False, type(e).__name__, time.time() - start_time) + fallback_content = self._handle_empty_result(messages, context, str(e)) + return fallback_content, 'error' + + # Check for retryable server errors (500, 502, 503, 504) + http_status = getattr(e, 'http_status', None) + retryable_errors = ["500", "502", "503", "504", "api_error", "internal server error", "bad gateway", "service unavailable", "gateway timeout"] + + if (http_status in [500, 502, 503, 504] or + any(err in error_str for err in retryable_errors)): + if attempt < internal_retries - 1: + # In multi-key mode, try rotating keys before backing off + if self._multi_key_mode and attempt > 0: # Only after first attempt + try: + print(f"🔄 Server error ({http_status or 'API error'}) - attempting key rotation (multi-key mode)") + self._handle_rate_limit_for_thread() + print(f"🔄 Server error retry: Key rotation completed, retrying immediately...") + time.sleep(1) # Brief pause after key rotation + continue # Retry with new key immediately + except Exception as rotation_error: + print(f"❌ Key rotation failed during server error: {rotation_error}") + # Fall back to normal exponential backoff + + # Exponential backoff with jitter + delay = self._compute_backoff(attempt, base_delay, 60) # Max 60 seconds + + print(f"🔄 Server error ({http_status or 'API error'}) - auto-retrying in {delay:.1f}s (attempt {attempt + 1}/{internal_retries})") + + # Wait with cancellation check + wait_start = time.time() + while time.time() - wait_start < delay: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) # Check every 0.5 seconds + print(f"🔄 Server error retry: Backoff completed, initiating retry attempt...") + time.sleep(1) # Brief pause after backoff for retry stability + continue # Retry the attempt + else: + print(f"❌ Server error ({http_status or 'API error'}) - exhausted {internal_retries} retries") + + # Check for other retryable errors (timeouts, connection issues) + timeout_errors = ["timeout", "timed out", "connection reset", "connection aborted", "connection error", "network error"] + if any(err in error_str for err in timeout_errors): + if attempt < internal_retries - 1: + delay = self._compute_backoff(attempt, base_delay/2, 30) # Shorter delay for timeouts + + print(f"🔄 Network/timeout error - retrying in {delay:.1f}s (attempt {attempt + 1}/{internal_retries})") + + wait_start = time.time() + while time.time() - wait_start < delay: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + print(f"🔄 Timeout error retry: Backoff completed, initiating retry attempt...") + time.sleep(0.1) # Brief pause after backoff for retry stability + continue # Retry the attempt + else: + print(f"❌ Network/timeout error - exhausted {internal_retries} retries") + + # If we get here, this is the last attempt or a non-retryable error + # Save failed request and return fallback only if we've exhausted retries + if attempt >= internal_retries - 1: + print(f"❌ Final attempt failed, returning fallback response") + self._save_failed_request(messages, e, context) + self._track_stats(context, False, type(e).__name__, time.time() - start_time) + fallback_content = self._handle_empty_result(messages, context, str(e)) + return fallback_content, 'error' + else: + # For other errors, try again with a short delay + delay = self._compute_backoff(attempt, base_delay/4, 15) # Short delay for other errors + print(f"🔄 API error - retrying in {delay:.1f}s (attempt {attempt + 1}/{internal_retries}): {str(e)[:100]}") + + wait_start = time.time() + while time.time() - wait_start < delay: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + print(f"🔄 API error retry: Backoff completed, initiating retry attempt...") + time.sleep(0.1) # Brief pause after backoff for retry stability + continue # Retry the attempt + + except Exception as e: + # COMPREHENSIVE ERROR HANDLING FOR NoneType and other issues + error_str = str(e).lower() + print(f"Unexpected error: {e}") + # Save unexpected error details to Payloads/failed_requests + try: + self._save_failed_request(messages, e, context) + except Exception: + pass + + # Special handling for NoneType length errors + if "nonetype" in error_str and "len" in error_str: + print(f"🚨 Detected NoneType length error - likely caused by None message content") + print(f"🔍 Error details: {type(e).__name__}: {e}") + print(f"🔍 Context: {context}, Messages count: {self._safe_len(messages, 'unexpected_error_messages')}") + + # Log the actual traceback for debugging + import traceback + print(f"🔍 Traceback: {traceback.format_exc()}") + + # Return a safe fallback + self._save_failed_request(messages, e, context) + self._track_stats(context, False, "nonetype_length_error", time.time() - start_time) + fallback_content = self._handle_empty_result(messages, context, "NoneType length error") + return fallback_content, 'error' + + # For unexpected errors, check if it's a timeout + if "timed out" in error_str: + # Re-raise timeout errors so the retry logic can handle them + raise UnifiedClientError(f"Request timed out: {e}", error_type="timeout") + + # Check if it's a rate limit error - handle according to mode + if self._is_rate_limit_error(e): + # In multi-key mode, always re-raise to let _send_core handle key rotation + if self._multi_key_mode: + print(f"🔄 Unexpected rate limit error - multi-key mode active, re-raising for key rotation") + raise + + # In single-key mode, check if indefinite retry is enabled + indefinite_retry_enabled = os.getenv("INDEFINITE_RATE_LIMIT_RETRY", "1") == "1" + + if indefinite_retry_enabled: + # Calculate wait time from Retry-After header if available + retry_after_seconds = 60 # Default wait time + if hasattr(e, 'http_status') and e.http_status == 429: + # Try to extract Retry-After from the error if it contains header info + error_details = str(e) + if 'retry-after' in error_details.lower(): + import re + match = re.search(r'retry-after[:\s]+([0-9]+)', error_details.lower()) + if match: + retry_after_seconds = int(match.group(1)) + + # Add some jitter and cap the wait time + wait_time = min(retry_after_seconds + random.uniform(1, 10), 300) # Max 5 minutes + + print(f"🔄 Unexpected rate limit error - single-key indefinite retry, waiting {wait_time:.1f}s (attempt {attempt + 1}/{internal_retries})") + + # Wait with cancellation check + wait_start = time.time() + while time.time() - wait_start < wait_time: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + + # For rate limit errors, continue retrying without counting against max retries + # Reset attempt counter to avoid exhausting retries on rate limits + attempt = max(0, attempt - 1) # Don't count rate limit waits against retry budget + continue # Retry the attempt + else: + print(f"❌ Unexpected rate limit error - single-key mode, indefinite retry disabled, re-raising") + raise # Re-raise for higher-level handling + + # Check for prohibited content in unexpected errors + if self._detect_safety_filter(messages, extracted_content or "", finish_reason, None, getattr(self, 'client_type', 'unknown')): + print(f"❌ Content prohibited in unexpected error: {error_str[:200]}") + + # If we're in multi-key mode and haven't tried the main key yet + if (self._multi_key_mode and not main_key_attempted and getattr(self, 'original_api_key', None) and getattr(self, 'original_model', None)): + main_key_attempted = True + try: + retry_res = self._retry_with_main_key(messages, temperature, max_tokens, max_completion_tokens, context) + if retry_res: + content, fr = retry_res + return content, fr + except Exception: + pass + + # Fall through to normal error handling + print(f"❌ Content prohibited - not retrying") + self._save_failed_request(messages, e, context) + self._track_stats(context, False, "unexpected_error", time.time() - start_time) + fallback_content = self._handle_empty_result(messages, context, str(e)) + return fallback_content, 'error' + + # Check for retryable server errors + retryable_server_errors = ["500", "502", "503", "504", "internal server error", "bad gateway", "service unavailable", "gateway timeout"] + if any(err in error_str for err in retryable_server_errors): + if attempt < internal_retries - 1: + # In multi-key mode, try rotating keys before backing off + if self._multi_key_mode and attempt > 0: # Only after first attempt + try: + print(f"🔄 Unexpected server error - attempting key rotation (multi-key mode)") + self._handle_rate_limit_for_thread() + print(f"🔄 Unexpected server error retry: Key rotation completed, retrying immediately...") + time.sleep(0.1) # Brief pause after key rotation + continue # Retry with new key immediately + except Exception as rotation_error: + print(f"❌ Key rotation failed during unexpected server error: {rotation_error}") + # Fall back to normal exponential backoff + + # Exponential backoff with jitter + delay = self._compute_backoff(attempt, base_delay, 60) # Max 60 seconds + + print(f"🔄 Server error - auto-retrying in {delay:.1f}s (attempt {attempt + 1}/{internal_retries})") + + wait_start = time.time() + while time.time() - wait_start < delay: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + continue # Retry the attempt + + # Check for other transient errors with exponential backoff + transient_errors = ["connection reset", "connection aborted", "connection error", "network error", "timeout", "timed out"] + if any(err in error_str for err in transient_errors): + if attempt < internal_retries - 1: + # In multi-key mode, try rotating keys for network issues + if self._multi_key_mode and attempt > 0: # Only after first attempt + try: + print(f"🔄 Transient error - attempting key rotation (multi-key mode)") + self._handle_rate_limit_for_thread() + print(f"🔄 Transient error retry: Key rotation completed, retrying immediately...") + time.sleep(0.1) # Brief pause after key rotation + continue # Retry with new key immediately + except Exception as rotation_error: + print(f"❌ Key rotation failed during transient error: {rotation_error}") + # Fall back to normal exponential backoff + + # Use a slightly less aggressive backoff for transient errors + delay = self._compute_backoff(attempt, base_delay/2, 30) # Max 30 seconds + + print(f"🔄 Transient error - retrying in {delay:.1f}s (attempt {attempt + 1}/{internal_retries})") + + wait_start = time.time() + while time.time() - wait_start < delay: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + continue # Retry the attempt + + # If we get here, either we've exhausted retries or it's a non-retryable error + if attempt >= internal_retries - 1: + print(f"❌ Unexpected error - final attempt failed, returning fallback") + self._save_failed_request(messages, e, context) + self._track_stats(context, False, "unexpected_error", time.time() - start_time) + fallback_content = self._handle_empty_result(messages, context, str(e)) + return fallback_content, 'error' + else: + # In multi-key mode, try rotating keys before short backoff + if self._multi_key_mode and attempt > 0: # Only after first attempt + try: + print(f"🔄 Other error - attempting key rotation (multi-key mode)") + self._handle_rate_limit_for_thread() + print(f"🔄 Other error retry: Key rotation completed, retrying immediately...") + time.sleep(0.1) # Brief pause after key rotation + continue # Retry with new key immediately + except Exception as rotation_error: + print(f"❌ Key rotation failed during other error: {rotation_error}") + # Fall back to normal exponential backoff + + # For other unexpected errors, try again with a short delay + delay = self._compute_backoff(attempt, base_delay/4, 15) # Short delay + print(f"🔄 Unexpected error - retrying in {delay:.1f}s (attempt {attempt + 1}/{internal_retries}): {str(e)[:100]}") + + wait_start = time.time() + while time.time() - wait_start < delay: + if self._cancelled: + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + time.sleep(0.5) + continue # Retry the attempt + + + def _retry_with_main_key(self, messages, temperature=None, max_tokens=None, + max_completion_tokens=None, context=None, + request_id=None, image_data=None) -> Optional[Tuple[str, Optional[str]]]: + """ + Unified retry method for both text and image requests with main/fallback keys. + Pass image_data=None for text requests, or image bytes/base64 for image requests. + Returns None when fallbacks are disabled. + """ + # Determine if this is an image request + is_image_request = image_data is not None + + # THREAD-SAFE RECURSION CHECK: Use thread-local storage + tls = self._get_thread_local_client() + + # Check if THIS THREAD is already in a retry (unified check for both text and image) + retry_flag = 'in_image_retry' if image_data else 'in_retry' + if getattr(tls, retry_flag, False): + retry_type = "IMAGE " if image_data else "" + print(f"[{retry_type}MAIN KEY RETRY] Thread {threading.current_thread().name} already in retry, preventing recursion") + return None + + # CHECK: Verify multi-key mode is actually enabled + if not self._multi_key_mode: + print(f"[MAIN KEY RETRY] Not in multi-key mode, skipping retry") + return None + + # CHECK: Multi-key mode is already verified above via self._multi_key_mode + # DO NOT gate main-GUI-key retry on fallback toggle; only use toggle for additional fallback keys + use_fallback_keys = os.getenv('USE_FALLBACK_KEYS', '0') == '1' + + # CHECK: Verify we have the necessary attributes + if not (hasattr(self, 'original_api_key') and + hasattr(self, 'original_model') and + self.original_api_key and + self.original_model): + print(f"[MAIN KEY RETRY] Missing original key/model attributes, skipping retry") + return None + + # Mark THIS THREAD as being in retry + setattr(tls, retry_flag, True) + + try: + fallback_keys = [] + + # FIRST: Always add the MAIN GUI KEY as the first fallback + fallback_keys.append({ + 'api_key': self.original_api_key, + 'model': self.original_model, + 'label': 'MAIN GUI KEY' + }) + print(f"[MAIN KEY RETRY] Using main GUI key with model: {self.original_model}") + + # Add configured fallback keys only if toggle is enabled + fallback_keys_json = os.getenv('FALLBACK_KEYS', '[]') + + if use_fallback_keys and fallback_keys_json != '[]': + try: + configured_fallbacks = json.loads(fallback_keys_json) + print(f"[DEBUG] Loaded {len(configured_fallbacks)} fallback keys from environment") + for fb in configured_fallbacks: + fallback_keys.append({ + 'api_key': fb.get('api_key'), + 'model': fb.get('model'), + 'google_credentials': fb.get('google_credentials'), + 'azure_endpoint': fb.get('azure_endpoint'), + 'google_region': fb.get('google_region'), + 'label': 'FALLBACK KEY' + }) + except Exception as e: + print(f"[DEBUG] Failed to parse FALLBACK_KEYS: {e}") + elif not use_fallback_keys: + print("[MAIN KEY RETRY] Fallback keys toggle is OFF — will try main GUI key only") + + print(f"[MAIN KEY RETRY] Total keys to try: {len(fallback_keys)}") + + # Try each fallback key in the list + max_attempts = min(len(fallback_keys), self._get_max_retries()) + for idx, fallback_data in enumerate(fallback_keys[:max_attempts]): + label = fallback_data.get('label', 'Fallback') + fallback_key = fallback_data.get('api_key') + fallback_model = fallback_data.get('model') + fallback_google_creds = fallback_data.get('google_credentials') + fallback_azure_endpoint = fallback_data.get('azure_endpoint') + fallback_google_region = fallback_data.get('google_region') + + print(f"[{label} {idx+1}/{max_attempts}] Trying {fallback_model}") + print(f"[{label} {idx+1}] Failed multi-key model was: {self.model}") + + try: + # Create a new temporary UnifiedClient instance with the fallback key + temp_client = UnifiedClient( + api_key=fallback_key, + model=fallback_model, + output_dir=self.output_dir + ) + + # Set key-specific credentials for the temp client + if fallback_google_creds: + temp_client.current_key_google_creds = fallback_google_creds + temp_client.google_creds_path = fallback_google_creds + print(f"[{label} {idx+1}] Using fallback Google credentials: {os.path.basename(fallback_google_creds)}") + + if fallback_google_region: + temp_client.current_key_google_region = fallback_google_region + print(f"[{label} {idx+1}] Using fallback Google region: {fallback_google_region}") + + if fallback_azure_endpoint: + temp_client.current_key_azure_endpoint = fallback_azure_endpoint + # Set up Azure-specific configuration + temp_client.is_azure = True + temp_client.azure_endpoint = fallback_azure_endpoint + temp_client.azure_api_version = os.getenv('AZURE_API_VERSION', '2024-08-01-preview') + print(f"[{label} {idx+1}] Using fallback Azure endpoint: {fallback_azure_endpoint}") + print(f"[{label} {idx+1}] Azure API version: {temp_client.azure_api_version}") + + # Don't override with main client's base_url if we have fallback Azure endpoint + if hasattr(self, 'base_url') and self.base_url and not fallback_azure_endpoint: + temp_client.base_url = self.base_url + temp_client.openai_base_url = self.base_url + + if hasattr(self, 'api_version') and not fallback_azure_endpoint: + temp_client.api_version = self.api_version + + # Only inherit Azure settings if fallback doesn't have its own Azure endpoint + if hasattr(self, 'is_azure') and self.is_azure and not fallback_azure_endpoint: + temp_client.is_azure = self.is_azure + temp_client.azure_endpoint = getattr(self, 'azure_endpoint', None) + temp_client.azure_api_version = getattr(self, 'azure_api_version', '2024-08-01-preview') + + # Force the client to reinitialize with Azure settings + temp_client._setup_client() + + # FORCE single-key mode after initialization + temp_client._multi_key_mode = False + temp_client.use_multi_keys = False + temp_client.key_identifier = f"{label} ({fallback_model})" + temp_client._is_retry_client = True + + # The client should already be set up from __init__, but verify + if not hasattr(temp_client, 'client_type') or temp_client.client_type is None: + temp_client.api_key = fallback_key + temp_client.model = fallback_model + temp_client._setup_client() + + # Copy relevant state BUT NOT THE CANCELLATION FLAG + temp_client.context = context + temp_client._cancelled = False + temp_client._in_cleanup = False + temp_client.current_session_context = self.current_session_context + temp_client.conversation_message_count = self.conversation_message_count + temp_client.request_timeout = self.request_timeout + + print(f"[{label} {idx+1}] Created temp client with model: {temp_client.model}") + print(f"[{label} {idx+1}] Multi-key mode: {temp_client._multi_key_mode}") + + # Get file names for response tracking + payload_name, response_name = self._get_file_names(messages, context=context) + + request_type = "image " if image_data else "" + print(f"[{label} {idx+1}] Sending {request_type}request...") + + # Use unified internal method to avoid nested retry loops + result = temp_client._send_internal( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + max_completion_tokens=max_completion_tokens, + context=context, + retry_reason=f"{request_type.replace(' ', '')}{label.lower().replace(' ', '_')}_{idx+1}", + request_id=request_id, + image_data=image_data + ) + + # Check the result + if result and isinstance(result, tuple): + content, finish_reason = result + + # Check if content is an error message + if content and "[AI RESPONSE UNAVAILABLE]" in content: + print(f"[{label} {idx+1}] ❌ Got error message: {content}") + continue + + # Check if content is valid - FIX: Add None check + if content and self._safe_len(content, "main_key_retry_content") > 50: + print(f"[{label} {idx+1}] ✅ SUCCESS! Got content of length: {len(content)}") + self._save_response(content, response_name) + return content, finish_reason + else: + print(f"[{label} {idx+1}] ❌ Content too short or empty: {len(content) if content else 0} chars") + continue + else: + print(f"[{label} {idx+1}] ❌ Unexpected result type: {type(result)}") + continue + + except UnifiedClientError as e: + if e.error_type == "cancelled": + print(f"[{label} {idx+1}] Operation was cancelled during retry") + return None + + error_str = str(e).lower() + if ("azure" in error_str and "content" in error_str) or e.error_type == "prohibited_content": + print(f"[{label} {idx+1}] ❌ Content filter error: {str(e)[:100]}") + continue + + print(f"[{label} {idx+1}] ❌ UnifiedClientError: {str(e)[:200]}") + continue + + except Exception as e: + print(f"[{label} {idx+1}] ❌ Exception: {str(e)[:200]}") + continue + + print(f"[MAIN KEY RETRY] ❌ All {max_attempts} fallback keys failed") + return None + + finally: + # ALWAYS clear the thread-local flag + setattr(tls, retry_flag, False) + + def _try_fallback_keys_direct(self, messages, temperature=None, max_tokens=None, + max_completion_tokens=None, context=None, + request_id=None, image_data=None) -> Optional[Tuple[str, Optional[str]]]: + """ + Try fallback keys directly in single-key mode without retrying main key. + Used when fallback keys are enabled in single-key mode. + """ + # Check if fallback keys are enabled + use_fallback_keys = os.getenv('USE_FALLBACK_KEYS', '0') == '1' + if not use_fallback_keys: + print(f"[FALLBACK DIRECT] Fallback keys not enabled, skipping") + return None + + # Load fallback keys from environment + fallback_keys_json = os.getenv('FALLBACK_KEYS', '[]') + if fallback_keys_json == '[]': + print(f"[FALLBACK DIRECT] No fallback keys configured") + return None + + try: + configured_fallbacks = json.loads(fallback_keys_json) + print(f"[FALLBACK DIRECT] Loaded {len(configured_fallbacks)} fallback keys") + + # Try each fallback key + max_attempts = min(len(configured_fallbacks), 3) # Limit attempts + for idx, fb in enumerate(configured_fallbacks[:max_attempts]): + fallback_key = fb.get('api_key') + fallback_model = fb.get('model') + fallback_google_creds = fb.get('google_credentials') + fallback_azure_endpoint = fb.get('azure_endpoint') + fallback_google_region = fb.get('google_region') + fallback_azure_api_version = fb.get('azure_api_version') + + if not fallback_key or not fallback_model: + print(f"[FALLBACK DIRECT {idx+1}] Invalid key data, skipping") + continue + + print(f"[FALLBACK DIRECT {idx+1}/{max_attempts}] Trying {fallback_model}") + + try: + # Create temporary client for fallback key + temp_client = UnifiedClient( + api_key=fallback_key, + model=fallback_model, + output_dir=self.output_dir + ) + + # Set key-specific credentials + if fallback_google_creds: + temp_client.current_key_google_creds = fallback_google_creds + temp_client.google_creds_path = fallback_google_creds + print(f"[FALLBACK DIRECT {idx+1}] Using Google credentials: {os.path.basename(fallback_google_creds)}") + + if fallback_google_region: + temp_client.current_key_google_region = fallback_google_region + print(f"[FALLBACK DIRECT {idx+1}] Using Google region: {fallback_google_region}") + + if fallback_azure_endpoint: + temp_client.current_key_azure_endpoint = fallback_azure_endpoint + # Set up Azure-specific configuration + temp_client.is_azure = True + temp_client.azure_endpoint = fallback_azure_endpoint + # Use per-key Azure API version, fallback to environment, then default + temp_client.azure_api_version = fallback_azure_api_version or os.getenv('AZURE_API_VERSION', '2025-01-01-preview') + print(f"[FALLBACK DIRECT {idx+1}] Using Azure endpoint: {fallback_azure_endpoint}") + print(f"[FALLBACK DIRECT {idx+1}] Azure API version: {temp_client.azure_api_version}") + + # Force single-key mode + temp_client._multi_key_mode = False + temp_client.key_identifier = f"FALLBACK KEY ({fallback_model})" + temp_client._is_retry_client = True + + # Setup the client + temp_client._setup_client() + + # Copy relevant state + temp_client.context = context + temp_client._cancelled = False + temp_client._in_cleanup = False + temp_client.current_session_context = self.current_session_context + temp_client.conversation_message_count = self.conversation_message_count + temp_client.request_timeout = self.request_timeout + + print(f"[FALLBACK DIRECT {idx+1}] Sending request...") + + # Use internal method to avoid nested retry loops + result = temp_client._send_internal( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + max_completion_tokens=max_completion_tokens, + context=context, + retry_reason=f"single_key_fallback_{idx+1}", + request_id=request_id, + image_data=image_data + ) + + # Check the result + if result and isinstance(result, tuple): + content, finish_reason = result + + # Check if content is valid + if content and "[AI RESPONSE UNAVAILABLE]" not in content and len(content) > 50: + print(f"[FALLBACK DIRECT {idx+1}] ✅ SUCCESS! Got content of length: {len(content)}") + return content, finish_reason + else: + print(f"[FALLBACK DIRECT {idx+1}] ❌ Content too short or error: {len(content) if content else 0} chars") + continue + else: + print(f"[FALLBACK DIRECT {idx+1}] ❌ Unexpected result type: {type(result)}") + continue + + except Exception as e: + print(f"[FALLBACK DIRECT {idx+1}] ❌ Failed: {e}") + continue + + print(f"[FALLBACK DIRECT] All fallback keys failed") + return None + + except Exception as e: + print(f"[FALLBACK DIRECT] Failed to parse fallback keys: {e}") + return None + + # Image handling methods + def send_image(self, messages: List[Dict[str, Any]], image_data: Any, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + max_completion_tokens: Optional[int] = None, + context: str = 'image_translation') -> Tuple[str, str]: + """Backwards-compatible public API; now delegates to unified _send_core.""" + return self._send_core(messages, temperature, max_tokens, max_completion_tokens, context, image_data=image_data) + + def _send_image_internal(self, messages: List[Dict[str, Any]], image_data: Any, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + max_completion_tokens: Optional[int] = None, + context: str = 'image_translation', + retry_reason: Optional[str] = None, + request_id=None) -> Tuple[str, str]: + """ + Image send internal - backwards compatibility wrapper + """ + return self._send_internal( + messages, temperature, max_tokens, max_completion_tokens, + context or 'image_translation', retry_reason, request_id, image_data=image_data + ) + + def _prepare_image_messages(self, messages: List[Dict[str, Any]], image_data: Any) -> List[Dict[str, Any]]: + """ + Helper method to prepare messages with embedded image for providers that accept image_url parts + """ + embedded_messages = [] + # Prepare base64 string + try: + if isinstance(image_data, (bytes, bytearray)): + b64 = base64.b64encode(image_data).decode('ascii') + else: + b64 = str(image_data) + except Exception: + b64 = str(image_data) + + image_part = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}} + + for msg in messages: + if msg.get('role') == 'user': + content = msg.get('content', '') + if isinstance(content, list): + new_parts = list(content) + new_parts.append(image_part) + embedded_messages.append({"role": "user", "content": new_parts}) + else: + embedded_messages.append({ + "role": "user", + "content": [ + {"type": "text", "text": content}, + image_part + ] + }) + else: + embedded_messages.append(msg) + + if not any(m.get('role') == 'user' for m in embedded_messages): + embedded_messages.append({"role": "user", "content": [image_part]}) + + return embedded_messages + + def _retry_image_with_main_key(self, messages, image_data, temperature=None, max_tokens=None, + max_completion_tokens=None, context=None, request_id=None) -> Optional[Tuple[str, Optional[str]]]: + """ + Image retry method - backwards compatibility wrapper + """ + return self._retry_with_main_key( + messages, temperature, max_tokens, max_completion_tokens, + context or 'image_translation', request_id, image_data=image_data + ) + + def reset_conversation_for_new_context(self, new_context): + """Reset conversation state when context changes""" + with self._model_lock: + self.current_session_context = new_context + self.conversation_message_count = 0 + self.pattern_counts.clear() + self.last_pattern = None + + logger.info(f"Reset conversation state for new context: {new_context}") + + def _apply_pure_reinforcement(self, messages): + """Apply PURE frequency-based reinforcement pattern""" + + # DISABLE in batch mode + #if os.getenv('BATCH_TRANSLATION', '0') == '1': + # return messages + + # Skip if not enough messages + if self.conversation_message_count < 4: + return messages + + # Create pattern from last 2 user messages + if len(messages) >= 2: + pattern = [] + for msg in messages[-2:]: + if msg.get('role') == 'user': + content = msg['content'] + pattern.append(len(content)) + + if len(pattern) >= 2: + pattern_key = f"reinforcement_{pattern[0]}_{pattern[1]}" + + # MICROSECOND LOCK: When modifying pattern_counts + with self._model_lock: + self.pattern_counts[pattern_key] = self.pattern_counts.get(pattern_key, 0) + 1 + count = self.pattern_counts[pattern_key] + + # Just track patterns, NO PROMPT INJECTION + if count >= 3: + logger.info(f"Pattern {pattern_key} detected (count: {count})") + # NO [PATTERN REINFORCEMENT ACTIVE] - KEEP IT GONE + + return messages + + def _validate_and_clean_messages(self, messages): + """Validate and clean messages, removing None entries and fixing content issues""" + if messages is None: + return [] + cleaned_messages = [] + for msg in messages: + if msg is None: + continue + if not isinstance(msg, dict): + continue + # Ensure role exists and is a string + if 'role' not in msg or msg['role'] is None: + msg = dict(msg) + msg['role'] = 'user' + # Normalize content + if msg.get('content') is None: + msg = dict(msg) # Make a copy + msg['content'] = '' + cleaned_messages.append(msg) + return cleaned_messages + def _merge_system_into_user(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert all system prompts into a user message by prepending them to the first + user message, separated by a line break. If no user message exists, one is created. + Supports both simple string content and OpenAI 'content parts' lists. + """ + if not messages: + return [] + system_texts: List[str] = [] + # Collect system texts and build the new list without system messages + pruned: List[Dict[str, Any]] = [] + for msg in messages: + role = msg.get("role") + if role == "system": + content = msg.get("content", "") + if isinstance(content, str): + if content.strip(): + system_texts.append(content.strip()) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + txt = part.get("text", "").strip() + if txt: + system_texts.append(txt) + # Skip adding this system message + continue + pruned.append(msg) + # Nothing to merge: still ensure we don't return an empty list + if not system_texts: + if not pruned: + return [{"role": "user", "content": ""}] # minimal valid user message to avoid empty list + return pruned + merged_header = "\n\n".join(system_texts).strip() + if merged_header: + merged_header += "\n" # ensure separation from current user content + # Find first user message and prepend + first_user_index = -1 + for i, m in enumerate(pruned): + if m.get("role") == "user": + first_user_index = i + break + if first_user_index >= 0: + um = pruned[first_user_index] + content = um.get("content", "") + if isinstance(content, str): + um["content"] = f"{merged_header}{content}" if merged_header else content + elif isinstance(content, list): + # If first part is text, prepend; otherwise insert a text part at the front + if content and isinstance(content[0], dict) and content[0].get("type") == "text": + content[0]["text"] = f"{merged_header}{content[0].get('text', '')}" if merged_header else content[0].get('text', '') + else: + text_part = {"type": "text", "text": merged_header or ""} + content.insert(0, text_part) + um["content"] = content + else: + # Unknown structure; coerce to string with the merged header + um["content"] = f"{merged_header}{str(content)}" + pruned[first_user_index] = um + else: + # No user message exists; create one with the merged header + pruned.append({"role": "user", "content": merged_header}) + return pruned + + def _validate_request(self, messages, max_tokens=None): + """Validate request parameters before sending""" + # Clean messages first + messages = self._validate_and_clean_messages(messages) + + if not messages: + return False, "Empty messages list" + + # Check message content isn't empty - FIX: Add None checks + total_chars = 0 + for msg in messages: + if msg is not None and msg.get('role') == 'user': + content = msg.get('content', '') + if content is not None: + total_chars += len(str(content)) + if total_chars == 0: + return False, "Empty request content" + + # Handle None max_tokens + if max_tokens is None: + max_tokens = getattr(self, 'max_tokens', 8192) # Use instance default or 8192 + + # Estimate tokens (rough approximation) + estimated_tokens = total_chars / 4 + if estimated_tokens > max_tokens * 2: + print(f"Request might be too long: ~{estimated_tokens} tokens vs {max_tokens} max") + + # Check for valid roles + valid_roles = {'system', 'user', 'assistant'} + for msg in messages: + if msg.get('role') not in valid_roles: + return False, f"Invalid role: {msg.get('role')}" + + return True, None + + def _track_stats(self, context, success, error_type=None, response_time=None): + """Track API call statistics""" + self.stats['total_requests'] += 1 + + if not success: + self.stats['empty_results'] += 1 + error_key = f"{getattr(self, 'client_type', 'unknown')}_{context}_{error_type}" + self.stats['errors'][error_key] = self.stats['errors'].get(error_key, 0) + 1 + + if response_time: + self.stats['response_times'].append(response_time) + + # Save stats periodically + if self.stats['total_requests'] % 10 == 0: + self._save_stats() + + def _save_stats(self): + """Save statistics to file""" + stats_file = "api_stats.json" + try: + with open(stats_file, 'w') as f: + json.dump(self.stats, f, indent=2) + except Exception as e: + print(f"Failed to save stats: {e}") + + def _save_failed_request(self, messages, error, context, response=None): + """Save failed requests for debugging""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + failed_dir = "Payloads/failed_requests" + os.makedirs(failed_dir, exist_ok=True) + + failure_data = { + 'timestamp': timestamp, + 'context': context, + 'error': str(error), + 'error_type': type(error).__name__, + 'messages': messages, + 'model': getattr(self, 'model', None), + 'client_type': getattr(self, 'client_type', None), + 'response': str(response) if response else None, + 'traceback': traceback.format_exc() + } + + filename = f"{failed_dir}/failed_{context}_{getattr(self, 'client_type', 'unknown')}_{timestamp}.json" + with open(filename, 'w', encoding='utf-8') as f: + json.dump(failure_data, f, indent=2, ensure_ascii=False) + + logger.info(f"Saved failed request to: {filename}") + + def _handle_empty_result(self, messages, context, error_info): + """Handle empty results with context-aware fallbacks""" + print(f"Handling empty result for context: {context}, error: {error_info}") + + # Log detailed error information for debugging + if isinstance(error_info, dict): + error_type = error_info.get('error', 'unknown') + error_details = error_info.get('details', '') + else: + error_type = str(error_info) + error_details = '' + + # Check if this is an extraction failure vs actual empty response + is_extraction_failure = 'extract' in error_type.lower() or 'parse' in error_type.lower() + + if context == 'glossary': + # For glossary, we might have partial data in error_info + if is_extraction_failure and isinstance(error_info, dict): + # Check if raw response is available + raw_response = error_info.get('raw_response', '') + if raw_response and 'character' in str(raw_response): + # Log that data exists but couldn't be extracted + print("⚠️ Glossary data exists in response but extraction failed!") + print(" Consider checking response extraction for this provider") + + # Return empty but valid JSON + return "[]" + + elif context == 'translation': + # Extract the original text and return it with a marker + original_text = self._extract_user_content(messages) + + # Add more specific error info if available + if is_extraction_failure: + return f"[EXTRACTION FAILED - ORIGINAL TEXT PRESERVED]\n{original_text}" + elif 'rate' in error_type.lower(): + return f"[RATE LIMITED - ORIGINAL TEXT PRESERVED]\n{original_text}" + elif 'safety' in error_type.lower() or 'prohibited' in error_type.lower(): + return f"[CONTENT BLOCKED - ORIGINAL TEXT PRESERVED]\n{original_text}" + else: + return f"[TRANSLATION FAILED - ORIGINAL TEXT PRESERVED]\n{original_text}" + + elif context == 'image_translation': + # Provide more specific error messages for image translation + if 'size' in error_type.lower(): + return "[IMAGE TOO LARGE - TRANSLATION FAILED]" + elif 'format' in error_type.lower(): + return "[UNSUPPORTED IMAGE FORMAT - TRANSLATION FAILED]" + elif is_extraction_failure: + return "[RESPONSE EXTRACTION FAILED]" + else: + return "[IMAGE TRANSLATION FAILED]" + + elif context == 'manga': + # Add manga-specific handling + return "[MANGA TRANSLATION FAILED]" + + elif context == 'metadata': + # For metadata extraction + return "{}" + + else: + # Generic fallback with error type + if is_extraction_failure: + return "[RESPONSE EXTRACTION FAILED]" + elif 'rate' in error_type.lower(): + return "[RATE LIMITED - PLEASE RETRY]" + else: + return "[AI RESPONSE UNAVAILABLE]" + + def _extract_response_text(self, response, provider=None, **kwargs): + """ + Universal response text extraction that works across all providers. + Includes enhanced OpenAI-specific handling and proper Gemini support. + """ + result = "" + finish_reason = 'stop' + + # Determine provider if not specified + if provider is None: + provider = self.client_type + + self._debug_log(f" 🔍 Extracting text from {provider} response...") + self._debug_log(f" 🔍 Response type: {type(response)}") + + # Handle UnifiedResponse objects + if isinstance(response, UnifiedResponse): + # Check if content is a string (even if empty) + if response.content is not None and isinstance(response.content, str): + # Always return the content from UnifiedResponse + if len(response.content) > 0: + self._debug_log(f" ✅ Got text from UnifiedResponse.content: {len(response.content)} chars") + else: + self._debug_log(f" ⚠️ UnifiedResponse has empty content (finish_reason: {response.finish_reason})") + return response.content, response.finish_reason or 'stop' + elif response.error_details: + self._debug_log(f" ⚠️ UnifiedResponse has error_details: {response.error_details}") + return "", response.finish_reason or 'error' + else: + # Only try to extract from raw_response if content is actually None + self._debug_log(f" ⚠️ UnifiedResponse.content is None, checking raw_response...") + if hasattr(response, 'raw_response') and response.raw_response: + self._debug_log(f" 🔍 Found raw_response, attempting extraction...") + response = response.raw_response + else: + self._debug_log(f" ⚠️ No raw_response found") + return "", 'error' + + # ========== GEMINI-SPECIFIC HANDLING ========== + if provider == 'gemini': + self._debug_log(f" 🔍 [Gemini] Attempting specialized extraction...") + + # Check for Gemini-specific response structure + if hasattr(response, 'candidates'): + self._debug_log(f" 🔍 [Gemini] Found candidates attribute") + if response.candidates: + candidate = response.candidates[0] + + # Check finish reason + if hasattr(candidate, 'finish_reason'): + finish_reason = str(candidate.finish_reason).lower() + self._debug_log(f" 🔍 [Gemini] Finish reason: {finish_reason}") + + # Map Gemini finish reasons + if 'max_tokens' in finish_reason: + finish_reason = 'length' + elif 'safety' in finish_reason or 'blocked' in finish_reason: + finish_reason = 'content_filter' + elif 'stop' in finish_reason: + finish_reason = 'stop' + + # Extract content from candidate + if hasattr(candidate, 'content'): + content = candidate.content + self._debug_log(f" 🔍 [Gemini] Content object: {content}") + self._debug_log(f" 🔍 [Gemini] Content type: {type(content)}") + self._debug_log(f" 🔍 [Gemini] Content attributes: {[attr for attr in dir(content) if not attr.startswith('_')][:10]}") + + # NEW: Try to access content as string directly first + try: + content_str = str(content) + if content_str and len(content_str) > 20 and 'role=' not in content_str: + self._debug_log(f" ✅ [Gemini] Got content from string conversion: {len(content_str)} chars") + return content_str, finish_reason + except Exception as e: + self._debug_log(f" ⚠️ [Gemini] String conversion failed: {e}") + + # Content might have parts - FIX: Add None check for parts + if hasattr(content, 'parts') and content.parts is not None: + parts_count = self._safe_len(content.parts, "gemini_content_parts") + self._debug_log(f" 🔍 [Gemini] Found {parts_count} parts in content") + text_parts = [] + + for i, part in enumerate(content.parts): + part_text = self._extract_part_text(part, provider='gemini', part_index=i+1) + if part_text: + text_parts.append(part_text) + + if text_parts: + result = ''.join(text_parts) + self._debug_log(f" ✅ [Gemini] Extracted from parts: {len(result)} chars") + return result, finish_reason + + else: + # NEW: Handle case where parts exist but contain no text + parts_count = self._safe_len(content.parts, "gemini_empty_parts") + self._debug_log(f" ⚠️ [Gemini] Parts found but no text extracted from {parts_count} parts") + # Don't return here, try other methods + + # Try direct text access on content + elif hasattr(content, 'text'): + if content.text: + self._debug_log(f" ✅ [Gemini] Got text from content.text: {len(content.text)} chars") + return content.text, finish_reason + + # NEW: Try accessing raw content data + for attr in ['text', 'content', 'data', 'message', 'response']: + if hasattr(content, attr): + try: + value = getattr(content, attr) + if value and isinstance(value, str) and len(value) > 10: + print(f" ✅ [Gemini] Got text from content.{attr}: {len(value)} chars") + return value, finish_reason + except Exception as e: + print(f" ⚠️ [Gemini] Failed to get content.{attr}: {e}") + + # Try to get text directly from candidate + if hasattr(candidate, 'text'): + if candidate.text: + print(f" ✅ [Gemini] Got text from candidate.text: {len(candidate.text)} chars") + return candidate.text, finish_reason + + # Alternative Gemini response structure (for native SDK) + if hasattr(response, 'text'): + try: + # This might be a property that needs to be called + text = response.text + if text: + print(f" ✅ [Gemini] Got text via response.text property: {len(text)} chars") + return text, finish_reason + except Exception as e: + print(f" ⚠️ [Gemini] Error accessing response.text: {e}") + + # Try parts directly on response - FIX: Add None check + if hasattr(response, 'parts') and response.parts is not None: + print(f" 🔍 [Gemini] Found parts directly on response") + text_parts = [] + for i, part in enumerate(response.parts): + part_text = self._extract_part_text(part, provider='gemini', part_index=i+1) + if part_text: + text_parts.append(part_text) + + if text_parts: + result = ''.join(text_parts) + print(f" ✅ [Gemini] Extracted from direct parts: {len(result)} chars") + return result, finish_reason + + print(f" ⚠️ [Gemini] Specialized extraction failed, trying generic methods...") + + # ========== ENHANCED OPENAI HANDLING ========== + elif provider == 'openai': + print(f" 🔍 [OpenAI] Attempting specialized extraction...") + + # Check if it's an OpenAI ChatCompletion object + if hasattr(response, 'choices') and response.choices is not None: + choices_count = self._safe_len(response.choices, "openai_response_choices") + print(f" 🔍 [OpenAI] Found choices attribute, {choices_count} choices") + + if response.choices: + choice = response.choices[0] + + # Log choice details + print(f" 🔍 [OpenAI] Choice type: {type(choice)}") + + # Get finish reason + if hasattr(choice, 'finish_reason'): + finish_reason = choice.finish_reason + print(f" 🔍 [OpenAI] Finish reason: {finish_reason}") + + # Normalize finish reasons + if finish_reason == 'max_tokens': + finish_reason = 'length' + elif finish_reason == 'content_filter': + finish_reason = 'content_filter' + + # Extract message content + if hasattr(choice, 'message'): + message = choice.message + print(f" 🔍 [OpenAI] Message type: {type(message)}") + + # Check for refusal first + if hasattr(message, 'refusal') and message.refusal: + print(f" 🚫 [OpenAI] Message was refused: {message.refusal}") + return f"[REFUSED]: {message.refusal}", 'content_filter' + + # Try to get content + if hasattr(message, 'content'): + content = message.content + + # Handle None content + if content is None: + print(f" ⚠️ [OpenAI] message.content is None") + + # Check if it's a function call instead + if hasattr(message, 'function_call'): + print(f" 🔍 [OpenAI] Found function_call instead of content") + return "", 'function_call' + elif hasattr(message, 'tool_calls'): + print(f" 🔍 [OpenAI] Found tool_calls instead of content") + return "", 'tool_call' + else: + print(f" ⚠️ [OpenAI] No content, refusal, or function calls found") + return "", finish_reason or 'error' + + # Handle empty string content + elif content == "": + print(f" ⚠️ [OpenAI] message.content is empty string") + if finish_reason == 'length': + print(f" ⚠️ [OpenAI] Empty due to length limit (tokens too low)") + return "", finish_reason or 'error' + + # Valid content found + else: + print(f" ✅ [OpenAI] Got content: {len(content)} chars") + return content, finish_reason + + # Try alternative attributes + elif hasattr(message, 'text'): + print(f" 🔍 [OpenAI] Trying message.text...") + if message.text: + print(f" ✅ [OpenAI] Got text: {len(message.text)} chars") + return message.text, finish_reason + + # Try dict access if message is dict-like + elif hasattr(message, 'get'): + content = message.get('content') or message.get('text') + if content: + print(f" ✅ [OpenAI] Got content via dict access: {len(content)} chars") + return content, finish_reason + + # Log all available attributes for debugging + print(f" ⚠️ [OpenAI] Message attributes: {[attr for attr in dir(message) if not attr.startswith('_')]}") + else: + print(f" ⚠️ [OpenAI] Empty choices array") + + # Check if there's metadata about why it's empty + if hasattr(response, 'model'): + print(f" Model used: {response.model}") + if hasattr(response, 'id'): + print(f" Response ID: {response.id}") + if hasattr(response, 'usage'): + print(f" Token usage: {response.usage}") + + # If OpenAI extraction failed, continue to generic methods + print(f" ⚠️ [OpenAI] Specialized extraction failed, trying generic methods...") + + # ========== GENERIC EXTRACTION METHODS ========== + + # Method 1: Direct text attributes (common patterns) + text_attributes = ['text', 'content', 'message', 'output', 'response', 'answer', 'reply'] + + for attr in text_attributes: + if hasattr(response, attr): + try: + value = getattr(response, attr) + if value is not None and isinstance(value, str) and len(value) > 0: + result = value + print(f" ✅ Got text from response.{attr}: {len(result)} chars") + return result, finish_reason + except Exception as e: + print(f" ⚠️ Failed to get response.{attr}: {e}") + + # Method 2: Common nested patterns + nested_patterns = [ + # OpenAI/Mistral pattern + lambda r: r.choices[0].message.content if hasattr(r, 'choices') and r.choices and hasattr(r.choices[0], 'message') and hasattr(r.choices[0].message, 'content') else None, + # Alternative OpenAI pattern + lambda r: r.choices[0].text if hasattr(r, 'choices') and r.choices and hasattr(r.choices[0], 'text') else None, + # Anthropic SDK pattern + lambda r: r.content[0].text if hasattr(r, 'content') and r.content and hasattr(r.content[0], 'text') else None, + # Gemini pattern - candidates structure + lambda r: r.candidates[0].content.parts[0].text if hasattr(r, 'candidates') and r.candidates and hasattr(r.candidates[0], 'content') and hasattr(r.candidates[0].content, 'parts') and r.candidates[0].content.parts else None, + # Cohere pattern + lambda r: r.text if hasattr(r, 'text') else None, + # JSON response pattern + lambda r: r.get('choices', [{}])[0].get('message', {}).get('content') if isinstance(r, dict) else None, + lambda r: r.get('content') if isinstance(r, dict) else None, + lambda r: r.get('text') if isinstance(r, dict) else None, + lambda r: r.get('output') if isinstance(r, dict) else None, + ] + + for i, pattern in enumerate(nested_patterns): + try: + extracted = pattern(response) + if extracted is not None and isinstance(extracted, str) and len(extracted) > 0: + result = extracted + print(f" ✅ Extracted via nested pattern {i+1}: {len(result)} chars") + return result, finish_reason + except Exception as e: + # Log pattern failures for debugging + if provider in ['openai', 'gemini'] and i < 4: # First patterns are provider-specific + print(f" ⚠️ [{provider}] Pattern {i+1} failed: {e}") + + # Method 3: String representation extraction (last resort) + if not result: + print(f" 🔍 Attempting string extraction as last resort...") + result = self._extract_from_string(response, provider=provider) + if result: + print(f" 🔧 Extracted from string representation: {len(result)} chars") + return result, finish_reason + + # Method 4: AGGRESSIVE GEMINI FALLBACK - Parse response string manually + if provider == 'gemini' and not result: + print(f" 🔍 [Gemini] Attempting aggressive manual parsing...") + try: + response_str = str(response) + + # Look for common patterns in Gemini response strings + import re + patterns = [ + r'text["\']([^"\'].*?)["\']', # text="content" or text='content' + r'text=([^,\)\]]+)', # text=content + r'content["\']([^"\'].*?)["\']', # content="text" + r'>([^<>{},\[\]]+)<', # HTML-like tags + ] + + for pattern in patterns: + matches = re.findall(pattern, response_str, re.DOTALL) + for match in matches: + if match and len(match.strip()) > 20: + # Clean up the match + clean_match = match.strip() + clean_match = clean_match.replace('\\n', '\n').replace('\\t', '\t') + if len(clean_match) > 20: + print(f" 🔧 [Gemini] Extracted via regex pattern: {len(clean_match)} chars") + return clean_match, finish_reason + + # If no patterns match, try to find the largest text block + words = response_str.split() + text_blocks = [] + current_block = [] + + for word in words: + if len(word) > 2 and word.isalpha() or any(c.isalpha() for c in word): + current_block.append(word) + else: + if len(current_block) > 5: # At least 5 words + text_blocks.append(' '.join(current_block)) + current_block = [] + + if current_block and len(current_block) > 5: + text_blocks.append(' '.join(current_block)) + + if text_blocks: + # Return the longest text block + longest_block = max(text_blocks, key=len) + if len(longest_block) > 50: + print(f" 🔧 [Gemini] Extracted longest text block: {len(longest_block)} chars") + return longest_block, finish_reason + + except Exception as e: + print(f" ⚠️ [Gemini] Aggressive parsing failed: {e}") + + # Final failure - log detailed debug info + print(f" ❌ Failed to extract text from {provider} response") + + # Log the full response structure for debugging + print(f" 🔍 [{provider}] Full response structure:") + print(f" Type: {type(response)}") + + # Log available attributes + if hasattr(response, '__dict__'): + attrs = list(response.__dict__.keys())[:20] + print(f" Attributes: {attrs}") + else: + attrs = [attr for attr in dir(response) if not attr.startswith('_')][:20] + print(f" Dir attributes: {attrs}") + + # Try to get any text representation as absolute last resort + try: + response_str = str(response) + if len(response_str) > 100 and len(response_str) < 100000: # Reasonable size + print(f" 🔍 Response string representation: {response_str[:500]}...") + except: + pass + + return "", 'error' + + + def _extract_part_text(self, part, provider=None, part_index=None): + """ + Extract text from a part object (handles various formats). + Enhanced with provider-specific handling and aggressive extraction. + """ + if provider == 'gemini' and part_index: + print(f" 🔍 [Gemini] Part {part_index} type: {type(part)}") + print(f" 🔍 [Gemini] Part {part_index} attributes: {[attr for attr in dir(part) if not attr.startswith('_')][:10]}") + + # Direct text attribute + if hasattr(part, 'text'): + try: + text = part.text + if text: + if provider == 'gemini' and part_index: + print(f" ✅ [Gemini] Part {part_index} has text via direct access: {len(text)} chars") + return text + except Exception as e: + if provider == 'gemini' and part_index: + print(f" ⚠️ [Gemini] Failed direct access on part {part_index}: {e}") + + # NEW: Try direct string conversion of the part + try: + part_str = str(part) + if part_str and len(part_str) > 10 and 'text=' not in part_str.lower(): + if provider == 'gemini' and part_index: + print(f" ✅ [Gemini] Part {part_index} extracted as string: {len(part_str)} chars") + return part_str + except Exception as e: + if provider == 'gemini' and part_index: + print(f" ⚠️ [Gemini] Part {part_index} string conversion failed: {e}") + + # Use getattr with fallback + try: + text = getattr(part, 'text', None) + if text: + if provider == 'gemini' and part_index: + print(f" ✅ [Gemini] Part {part_index} has text via getattr: {len(text)} chars") + return text + except Exception as e: + if provider == 'gemini' and part_index: + print(f" ⚠️ [Gemini] Failed getattr on part {part_index}: {e}") + + # String representation extraction + part_str = str(part) + + if provider == 'gemini' and part_index: + print(f" 🔍 [Gemini] Part {part_index} string representation length: {len(part_str)}") + + if 'text=' in part_str or 'text":' in part_str: + import re + patterns = [ + r'text="""(.*?)"""', # Triple quotes (common in Gemini) + r'text="([^"]*(?:\\.[^"]*)*)"', # Double quotes with escaping + r"text='([^']*(?:\\.[^']*)*)'", # Single quotes + r'text=([^,\)]+)', # Unquoted text (last resort) + ] + + for pattern in patterns: + match = re.search(pattern, part_str, re.DOTALL) + if match: + text = match.group(1) + # Unescape common escape sequences + text = text.replace('\\n', '\n') + text = text.replace('\\t', '\t') + text = text.replace('\\r', '\r') + text = text.replace('\\"', '"') + text = text.replace("\\'", "'") + text = text.replace('\\\\', '\\') + + if provider == 'gemini' and part_index: + #print(f" 🔧 [Gemini] Part {part_index} extracted via regex pattern: {len(text)} chars") + pass + + return text + + # Part is itself a string + if isinstance(part, str): + if provider == 'gemini' and part_index: + print(f" ✅ [Gemini] Part {part_index} is a string: {len(part)} chars") + return part + + if provider == 'gemini' and part_index: + print(f" ⚠️ [Gemini] Failed string extraction on part {part_index}") + + return None + + + def _extract_from_string(self, response, provider=None): + """ + Extract text from string representation of response. + Enhanced with provider-specific patterns. + """ + try: + response_str = str(response) + import re + + # Common patterns in string representations + patterns = [ + r'text="""(.*?)"""', # Triple quotes (Gemini often uses this) + r'text="([^"]*(?:\\.[^"]*)*)"', # Double quotes + r"text='([^']*(?:\\.[^']*)*)'", # Single quotes + r'content="([^"]*(?:\\.[^"]*)*)"', # Content field + r'content="""(.*?)"""', # Triple quoted content + r'"text":\s*"([^"]*(?:\\.[^"]*)*)"', # JSON style + r'"content":\s*"([^"]*(?:\\.[^"]*)*)"', # JSON content + ] + + for pattern in patterns: + match = re.search(pattern, response_str, re.DOTALL) + if match: + text = match.group(1) + # Unescape common escape sequences + text = text.replace('\\n', '\n') + text = text.replace('\\t', '\t') + text = text.replace('\\r', '\r') + text = text.replace('\\"', '"') + text = text.replace("\\'", "'") + text = text.replace('\\\\', '\\') + + if provider == 'gemini': + #print(f" 🔧 [Gemini] Extracted from string using pattern: {pattern[:30]}...") + pass + + return text + except Exception as e: + if provider == 'gemini': + print(f" ⚠️ [Gemini] Error during string extraction: {e}") + else: + print(f" ⚠️ Error during string extraction: {e}") + + return None + + def _extract_user_content(self, messages): + """Extract user content from messages""" + for msg in reversed(messages): + if msg.get('role') == 'user': + return msg.get('content', '') + return '' + + def _get_file_names(self, messages, context=None): + """Generate appropriate file names based on context + + IMPORTANT: File naming must support duplicate detection across chapters + """ + if context == 'glossary': + payload_name = f"glossary_payload_{self.conversation_message_count}.json" + response_name = f"glossary_response_{self.conversation_message_count}.txt" + elif context == 'translation': + # Extract chapter info if available - CRITICAL for duplicate detection + content_str = str(messages) + # Remove any rolling summary blocks to avoid picking previous chapter numbers + try: + content_str = re.sub(r"\[Rolling Summary of Chapter \d+\][\s\S]*?\[End of Rolling Summary\]", "", content_str, flags=re.IGNORECASE) + except Exception: + pass + chapter_match = re.search(r'Chapter (\d+)', content_str) + if chapter_match: + chapter_num = chapter_match.group(1) + # Use standard naming that duplicate detection expects + payload_name = f"translation_chapter_{chapter_num}_payload.json" + response_name = f"response_{chapter_num}.html" # This format is expected by duplicate detection + else: + # Check for chunk information + chunk_match = re.search(r'Chunk (\d+)/(\d+)', str(messages)) + if chunk_match: + chunk_num = chunk_match.group(1) + total_chunks = chunk_match.group(2) + # Extract chapter from fuller context + chapter_in_chunk = re.search(r'Chapter (\d+)', str(messages)) + if chapter_in_chunk: + chapter_num = chapter_in_chunk.group(1) + payload_name = f"translation_chapter_{chapter_num}_chunk_{chunk_num}_payload.json" + response_name = f"response_{chapter_num}_chunk_{chunk_num}.html" + else: + payload_name = f"translation_chunk_{chunk_num}_of_{total_chunks}_payload.json" + response_name = f"response_chunk_{chunk_num}_of_{total_chunks}.html" + else: + payload_name = f"translation_payload_{self.conversation_message_count}.json" + response_name = f"response_{self.conversation_message_count}.html" + else: + payload_name = f"{context or 'general'}_payload_{self.conversation_message_count}.json" + response_name = f"{context or 'general'}_response_{self.conversation_message_count}.txt" + self._last_response_filename = response_name + return payload_name, response_name + + def _save_payload(self, messages, filename, retry_reason=None): + """Save request payload for debugging with retry reason tracking""" + + # Get stable thread directory + thread_dir = self._get_thread_directory() + + # Generate request hash for the filename (to make it unique) + request_hash = self._get_request_hash(messages) + + # Add hash and retry info to filename + base_name, ext = os.path.splitext(filename) + timestamp = datetime.now().strftime("%H%M%S") + + # Include retry reason in filename if provided + if retry_reason: + # Sanitize retry reason for filename + safe_reason = retry_reason.replace(" ", "_").replace("/", "_")[:20] + unique_filename = f"{base_name}_{timestamp}_{safe_reason}_{request_hash[:6]}{ext}" + else: + unique_filename = f"{base_name}_{timestamp}_{request_hash[:6]}{ext}" + + filepath = os.path.join(thread_dir, unique_filename) + + try: + # Thread-safe file writing + with self._file_write_lock: + thread_name = threading.current_thread().name + thread_id = threading.current_thread().ident + + # Extract chapter info for better tracking + chapter_info = self._extract_chapter_info(messages) + + # Include debug info with retry reason + debug_info = { + 'system_prompt_present': any(msg.get('role') == 'system' for msg in messages), + 'system_prompt_length': 0, + 'request_hash': request_hash, + 'thread_name': thread_name, + 'thread_id': thread_id, + 'session_id': self.session_id, + 'chapter_info': chapter_info, + 'timestamp': datetime.now().isoformat(), + 'key_identifier': self.key_identifier, + 'retry_reason': retry_reason, # Track why this payload was saved + 'is_retry': retry_reason is not None + } + + for msg in messages: + if msg.get('role') == 'system': + debug_info['system_prompt_length'] = len(msg.get('content', '')) + break + + # Write the payload + with open(filepath, 'w', encoding='utf-8') as f: + json.dump({ + 'model': getattr(self, 'model', None), + 'client_type': getattr(self, 'client_type', None), + 'messages': messages, + 'timestamp': datetime.now().isoformat(), + 'debug': debug_info, + 'key_identifier': getattr(self, 'key_identifier', None), + 'retry_info': { + 'reason': retry_reason, + 'attempt': getattr(self, '_current_retry_attempt', 0), + 'max_retries': getattr(self, '_max_retries', 7) + } if retry_reason else None + }, f, indent=2, ensure_ascii=False) + + logger.debug(f"[{thread_name}] Saved payload to: {filepath} (reason: {retry_reason or 'initial'})") + + except Exception as e: + print(f"Failed to save payload: {e}") + + + def _save_response(self, content: str, filename: str): + """Save API response with enhanced thread safety and deduplication""" + if not content or not os.getenv("SAVE_PAYLOAD", "1") == "1": + return + + # ONLY save JSON files to Payloads folder + if not filename.endswith('.json'): + logger.debug(f"Skipping HTML response save to Payloads: {filename}") + return + + # Get thread-specific directory + thread_dir = self._get_thread_directory() + thread_id = threading.current_thread().ident + + try: + # Generate content hash for deduplication + content_hash = hashlib.sha256(content.encode()).hexdigest()[:12] + + # Clean up filename + safe_filename = os.path.basename(filename) + base_name, ext = os.path.splitext(safe_filename) + + # Create unique filename with thread ID and content hash + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:19] # Include microseconds + unique_filename = f"{base_name}_T{thread_id}_{timestamp}_{content_hash}{ext}" + filepath = os.path.join(thread_dir, unique_filename) + + # Get file-specific lock + file_lock = self._get_file_lock(filepath) + + with file_lock: + # Check if this exact content was already saved (deduplication) + if self._is_duplicate_file(thread_dir, content_hash): + logger.debug(f"Skipping duplicate response save: {content_hash[:8]}") + return + + # Write atomically with temp file + temp_filepath = filepath + '.tmp' + + try: + os.makedirs(thread_dir, exist_ok=True) + + if filename.endswith('.json'): + try: + json_content = json.loads(content) if isinstance(content, str) else content + with open(temp_filepath, 'w', encoding='utf-8') as f: + json.dump(json_content, f, indent=2, ensure_ascii=False) + except json.JSONDecodeError: + with open(temp_filepath, 'w', encoding='utf-8') as f: + f.write(content) + else: + with open(temp_filepath, 'w', encoding='utf-8') as f: + f.write(content) + + # Atomic rename + os.replace(temp_filepath, filepath) + logger.debug(f"Saved response: {filepath}") + + except Exception as e: + if os.path.exists(temp_filepath): + os.remove(temp_filepath) + raise + + except Exception as e: + print(f"Failed to save response: {e}") + + def _get_file_lock(self, filepath: str) -> RLock: + """Get or create a lock for a specific file""" + with self._file_write_locks_lock: + if filepath not in self._file_write_locks: + self._file_write_locks[filepath] = RLock() + return self._file_write_locks[filepath] + + def _is_duplicate_file(self, directory: str, content_hash: str) -> bool: + """Check if a file with this content hash already exists""" + try: + for filename in os.listdir(directory): + if content_hash in filename and filename.endswith('.json'): + return True + except: + pass + return False + + def set_output_filename(self, filename: str): + """Set the actual output filename for truncation logging + + This should be called before sending a request to inform the client + about the actual chapter output filename (e.g., response_001_Chapter_1.html) + + Args: + filename: The actual output filename that will be created in the book folder + """ + self._actual_output_filename = filename + logger.debug(f"Set output filename for truncation logging: {filename}") + + def set_output_directory(self, directory: str): + """Set the output directory for truncation logs + + Args: + directory: The output directory path (e.g., the book folder) + """ + self.output_dir = directory + logger.debug(f"Set output directory: {directory}") + + def cancel_current_operation(self): + """Mark current operation as cancelled + + IMPORTANT: Called by send_with_interrupt when timeout occurs + """ + self._cancelled = True + self._in_cleanup = True # Set cleanup flag correctly + # Show cancellation messages before setting global flag (to avoid circular check) + print("🛑 Operation cancelled (timeout or user stop)") + print("🛑 API operation cancelled") + # Set global cancellation to affect all instances + self.set_global_cancellation(True) + # Suppress httpx logging when cancelled + self._suppress_http_logs() + + def _suppress_http_logs(self): + """Suppress HTTP and API logging during cancellation""" + import logging + # Suppress httpx logs (used by OpenAI client) + httpx_logger = logging.getLogger('httpx') + httpx_logger.setLevel(logging.WARNING) + + # Suppress OpenAI client logs + openai_logger = logging.getLogger('openai') + openai_logger.setLevel(logging.WARNING) + + # Suppress our own API client logs + unified_logger = logging.getLogger('unified_api_client') + unified_logger.setLevel(logging.WARNING) + + def _reset_http_logs(self): + """Reset HTTP and API logging levels for new operations""" + import logging + # Reset httpx logs back to INFO + httpx_logger = logging.getLogger('httpx') + httpx_logger.setLevel(logging.INFO) + + # Reset OpenAI client logs back to INFO + openai_logger = logging.getLogger('openai') + openai_logger.setLevel(logging.INFO) + + # Reset our own API client logs back to INFO + unified_logger = logging.getLogger('unified_api_client') + unified_logger.setLevel(logging.INFO) + + def reset_cleanup_state(self): + """Reset cleanup state for new operations""" + self._in_cleanup = False + self._cancelled = False + # Reset global cancellation flag for new operations + self.set_global_cancellation(False) + # Reset logging levels for new operations + self._reset_http_logs() + + def _send_vertex_model_garden(self, messages, temperature=0.7, max_tokens=None, stop_sequences=None, response_name=None): + """Send request to Vertex AI Model Garden models (including Claude)""" + response = None + try: + from google.cloud import aiplatform + from google.oauth2 import service_account + from google.auth.transport.requests import Request + import google.auth.transport.requests + import vertexai + import json + import os + import re + import traceback + import logging + + # Get logger + logger = logging.getLogger(__name__) + + # Import or define UnifiedClientError + try: + # Try to import from the module if it exists + from unified_api_client import UnifiedClientError, UnifiedResponse + except ImportError: + # Define them locally if import fails + class UnifiedClientError(Exception): + def __init__(self, message, error_type=None): + super().__init__(message) + self.error_type = error_type + + from dataclasses import dataclass + @dataclass + class UnifiedResponse: + content: str + usage: dict = None + finish_reason: str = 'stop' + raw_response: object = None + + # Import your global stop check function + try: + from TranslateKRtoEN import is_stop_requested + except ImportError: + # Fallback to checking _cancelled flag + def is_stop_requested(): + return self._cancelled + + # Use the same credentials as Cloud Vision (comes from GUI config) + google_creds_path = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') + if not google_creds_path: + # Try to get from config + if hasattr(self, 'main_gui') and hasattr(self.main_gui, 'config'): + google_creds_path = self.main_gui.config.get('google_vision_credentials', '') or \ + self.main_gui.config.get('google_cloud_credentials', '') + + if not google_creds_path or not os.path.exists(google_creds_path): + raise ValueError("Google Cloud credentials not found. Please set up credentials.") + + # Load credentials with proper scopes + credentials = service_account.Credentials.from_service_account_file( + google_creds_path, + scopes=['https://www.googleapis.com/auth/cloud-platform'] + ) + + # Extract project ID from credentials + with open(google_creds_path, 'r') as f: + creds_data = json.load(f) + project_id = creds_data.get('project_id') + + if not project_id: + raise ValueError("Project ID not found in credentials file") + + logger.info(f"Using project ID: {project_id}") + + # Parse model name + model_name = self.model + if model_name.startswith('vertex_ai/'): + model_name = model_name[10:] # Remove "vertex_ai/" prefix + elif model_name.startswith('vertex/'): + model_name = model_name[7:] # Remove "vertex/" prefix + + logger.info(f"Using model: {model_name}") + + # For Claude models, use the Anthropic SDK with Vertex AI + if 'claude' in model_name.lower(): + # Import Anthropic exceptions + try: + from anthropic import AnthropicVertex + import anthropic + import httpx + except ImportError: + raise UnifiedClientError("Anthropic SDK not installed. Run: pip install anthropic") + + # Use the region from environment variable (which comes from GUI) + region = os.getenv('VERTEX_AI_LOCATION', 'us-east5') + + # CHECK STOP FLAG + if is_stop_requested(): + logger.info("Stop requested, cancelling") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + + # Initialize Anthropic client for Vertex AI + client = AnthropicVertex( + project_id=project_id, + region=region + ) + + # Convert messages to Anthropic format + anthropic_messages = [] + system_prompt = "" + + for msg in messages: + if msg['role'] == 'system': + system_prompt = msg['content'] + else: + anthropic_messages.append({ + "role": msg['role'], + "content": msg['content'] + }) + + # Create message with Anthropic client + kwargs = { + "model": model_name, + "messages": anthropic_messages, + "max_tokens": max_tokens or 4096, + "temperature": temperature, + } + + if system_prompt: + kwargs["system"] = system_prompt + + if stop_sequences: + kwargs["stop_sequences"] = stop_sequences + + # CHECK STOP FLAG BEFORE API CALL + if is_stop_requested(): + logger.info("Stop requested, cancelling API call") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + + try: + message = client.messages.create(**kwargs) + + except httpx.HTTPStatusError as e: + # Handle HTTP status errors from the Anthropic SDK + status_code = e.response.status_code if hasattr(e.response, 'status_code') else 0 + error_body = e.response.text if hasattr(e.response, 'text') else str(e) + + # Check if it's an HTML error page + if '' in error_body or '' in error_str or '' in error_str or ' 200: + raise UnifiedClientError(f"Vertex AI error: Request failed. Check your region and model name.") + else: + raise UnifiedClientError(f"Vertex AI error: {error_str}") + + # CHECK STOP FLAG AFTER RESPONSE + if is_stop_requested(): + logger.info("Stop requested after response, discarding result") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + # Success! Convert response to UnifiedResponse + print(f"Successfully got response from {region}") + return UnifiedResponse( + content=message.content[0].text if message.content else "", + usage={ + "input_tokens": message.usage.input_tokens, + "output_tokens": message.usage.output_tokens, + "total_tokens": message.usage.input_tokens + message.usage.output_tokens + } if hasattr(message, 'usage') else None, + finish_reason=message.stop_reason if hasattr(message, 'stop_reason') else 'stop', + raw_response=message + ) + + else: + # For Gemini models on Vertex AI, we need to use Vertex AI SDK + location = os.getenv('VERTEX_AI_LOCATION', 'us-east5') + + # Check stop flag before Gemini call + if is_stop_requested(): + logger.info("Stop requested, cancelling Vertex AI Gemini request") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + # Initialize Vertex AI + vertexai.init(project=project_id, location=location, credentials=credentials) + + # Import GenerativeModel from vertexai + from vertexai.generative_models import GenerativeModel, GenerationConfig, HarmCategory, HarmBlockThreshold + + # Create model instance + vertex_model = GenerativeModel(model_name) + + # Format messages for Vertex AI Gemini using existing formatter + formatted_prompt = self._format_prompt(messages, style='gemini') + + # Check if safety settings are disabled via config (from GUI) + disable_safety = os.getenv("DISABLE_GEMINI_SAFETY", "false").lower() == "true" + + # Get thinking budget from environment (though Vertex AI may not support it) + thinking_budget = int(os.getenv("THINKING_BUDGET", "-1")) + enable_thinking = os.getenv("ENABLE_GEMINI_THINKING", "0") == "1" + + # Log configuration + print(f"\n🔧 Vertex AI Gemini Configuration:") + print(f" Model: {model_name}") + print(f" Region: {location}") + print(f" Project: {project_id}") + + # Configure generation parameters using passed parameters + generation_config_dict = { + "temperature": temperature, + "max_output_tokens": max_tokens or 8192, + } + + # Add user-configured anti-duplicate parameters if enabled + if os.getenv("ENABLE_ANTI_DUPLICATE", "0") == "1": + # Get all anti-duplicate parameters from environment + if os.getenv("TOP_P"): + top_p = float(os.getenv("TOP_P", "1.0")) + if top_p < 1.0: # Only add if not default + generation_config_dict["top_p"] = top_p + + if os.getenv("TOP_K"): + top_k = int(os.getenv("TOP_K", "0")) + if top_k > 0: # Only add if not default + generation_config_dict["top_k"] = top_k + + # Note: Vertex AI Gemini may not support all parameters like frequency_penalty + # Add only supported parameters + if os.getenv("CANDIDATE_COUNT"): + candidate_count = int(os.getenv("CANDIDATE_COUNT", "1")) + if candidate_count > 1: + generation_config_dict["candidate_count"] = candidate_count + + # Add custom stop sequences if provided + custom_stops = os.getenv("CUSTOM_STOP_SEQUENCES", "").strip() + if custom_stops: + additional_stops = [s.strip() for s in custom_stops.split(",") if s.strip()] + if stop_sequences: + stop_sequences.extend(additional_stops) + else: + stop_sequences = additional_stops + + if stop_sequences: + generation_config_dict["stop_sequences"] = stop_sequences + + # Create generation config + generation_config = GenerationConfig(**generation_config_dict) + + # Configure safety settings based on GUI toggle + safety_settings = None + if disable_safety: + # Import SafetySetting from vertexai + from vertexai.generative_models import SafetySetting + + # Create list of SafetySetting objects (same format as regular Gemini) + safety_settings = [ + SafetySetting( + category=HarmCategory.HARM_CATEGORY_HATE_SPEECH, + threshold=HarmBlockThreshold.BLOCK_NONE + ), + SafetySetting( + category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, + threshold=HarmBlockThreshold.BLOCK_NONE + ), + SafetySetting( + category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, + threshold=HarmBlockThreshold.BLOCK_NONE + ), + SafetySetting( + category=HarmCategory.HARM_CATEGORY_HARASSMENT, + threshold=HarmBlockThreshold.BLOCK_NONE + ), + SafetySetting( + category=HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY, + threshold=HarmBlockThreshold.BLOCK_NONE + ), + ] + # Only log if not stopping + if not self._is_stop_requested(): + print(f"🔒 Vertex AI Gemini Safety Status: DISABLED - All categories set to BLOCK_NONE") + else: + # Only log if not stopping + if not self._is_stop_requested(): + print(f"🔒 Vertex AI Gemini Safety Status: ENABLED - Using default Gemini safety settings") + + # SAVE SAFETY CONFIGURATION FOR VERIFICATION + if safety_settings: + safety_status = "DISABLED - All categories set to BLOCK_NONE" + readable_safety = { + "HATE_SPEECH": "BLOCK_NONE", + "SEXUALLY_EXPLICIT": "BLOCK_NONE", + "HARASSMENT": "BLOCK_NONE", + "DANGEROUS_CONTENT": "BLOCK_NONE", + "CIVIC_INTEGRITY": "BLOCK_NONE" + } + else: + safety_status = "ENABLED - Using default Gemini safety settings" + readable_safety = "DEFAULT" + + # Save configuration to file + config_data = { + "type": "VERTEX_AI_GEMINI_REQUEST", + "model": model_name, + "project_id": project_id, + "location": location, + "safety_enabled": not disable_safety, + "safety_settings": readable_safety, + "temperature": temperature, + "max_output_tokens": max_tokens or 8192, + "timestamp": datetime.now().isoformat(), + } + + # Save configuration to file with thread isolation + self._save_gemini_safety_config(config_data, response_name) + + # Retry logic + attempts = self._get_max_retries() + attempt = 0 + result_text = "" + + while attempt < attempts and not result_text: + try: + # Update max_output_tokens for this attempt + generation_config_dict["max_output_tokens"] = max_tokens or 8192 + generation_config = GenerationConfig(**generation_config_dict) + + # Only log if not stopping + if not self._is_stop_requested(): + print(f" 📊 Temperature: {temperature}, Max tokens: {max_tokens or 8192}") + + # Generate content with optional safety settings + if safety_settings: + response = vertex_model.generate_content( + formatted_prompt, + generation_config=generation_config, + safety_settings=safety_settings + ) + else: + response = vertex_model.generate_content( + formatted_prompt, + generation_config=generation_config + ) + + # Extract text from response + if response.candidates: + for candidate in response.candidates: + if candidate.content and candidate.content.parts: + for part in candidate.content.parts: + if hasattr(part, 'text'): + result_text += part.text + + # Check if we got content + if result_text and result_text.strip(): + break + else: + raise Exception("Empty response from Vertex AI") + + except Exception as e: + print(f"Vertex AI Gemini attempt {attempt+1} failed: {e}") + + # Check for quota errors + error_str = str(e) + if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str: + raise UnifiedClientError( + f"Quota exceeded for Vertex AI Gemini model: {model_name}\n\n" + "Request quota increase in Google Cloud Console." + ) + elif "404" in error_str or "NOT_FOUND" in error_str: + raise UnifiedClientError( + f"Model {model_name} not found in region {location}.\n\n" + "Available Gemini models on Vertex AI:\n" + "• gemini-1.5-flash-002\n" + "• gemini-1.5-pro-002\n" + "• gemini-1.0-pro-002" + ) + + # No automatic retry - let higher level handle retries + #attempt += 1 + #if attempt < attempts: + # print(f"❌ Gemini attempt {attempt} failed, no automatic retry") + # break # Exit the retry loop + + # Check stop flag after response + if is_stop_requested(): + logger.info("Stop requested after Vertex AI Gemini response") + raise UnifiedClientError("Operation cancelled by user", error_type="cancelled") + + if not result_text: + raise UnifiedClientError("All Vertex AI Gemini attempts failed to produce content") + + return UnifiedResponse( + content=result_text, + finish_reason='stop', + raw_response=response + ) + + except UnifiedClientError: + # Re-raise our own errors without modification + raise + except Exception as e: + # Handle any other unexpected errors + error_str = str(e) + # Don't print HTML errors + if '' not in error_str and ' int: + """Parse Retry-After header (seconds or HTTP-date) into seconds.""" + if not value: + return 0 + value = value.strip() + if value.isdigit(): + try: + return max(0, int(value)) + except Exception: + return 0 + try: + import email.utils + dt = email.utils.parsedate_to_datetime(value) + if dt is None: + return 0 + now = datetime.now(dt.tzinfo) + secs = int((dt - now).total_seconds()) + return max(0, secs) + except Exception: + return 0 + + def _get_session(self, base_url: str): + """Get or create a thread-local requests.Session for a base_url with connection pooling.""" + tls = self._get_thread_local_client() + if not hasattr(tls, "session_map"): + tls.session_map = {} + session = tls.session_map.get(base_url) + if session is None: + session = requests.Session() + try: + adapter = HTTPAdapter( + pool_connections=int(os.getenv("HTTP_POOL_CONNECTIONS", "20")), + pool_maxsize=int(os.getenv("HTTP_POOL_MAXSIZE", "50")), + max_retries=Retry(total=0) if Retry is not None else 0 + ) + except Exception: + adapter = HTTPAdapter( + pool_connections=int(os.getenv("HTTP_POOL_CONNECTIONS", "20")), + pool_maxsize=int(os.getenv("HTTP_POOL_MAXSIZE", "50")) + ) + session.mount("http://", adapter) + session.mount("https://", adapter) + tls.session_map[base_url] = session + return session + + def _http_request_with_retries(self, method: str, url: str, headers: dict = None, json: dict = None, + expected_status: tuple = (200,), max_retries: int = 3, + provider_name: str = None, use_session: bool = False): + """ + Generic HTTP requester with standardized retry behavior. + - Handles cancellation, rate limits (429 with Retry-After), 5xx with backoff, and generic errors. + - Returns the requests.Response object when a successful status is received. + """ + api_delay = self._get_send_interval() + provider = provider_name or "HTTP" + for attempt in range(max_retries): + if self._cancelled: + raise UnifiedClientError("Operation cancelled") + + # Toggle to ignore server-provided Retry-After headers + ignore_retry_after = (os.getenv("ENABLE_HTTP_TUNING", "0") == "1") and (os.getenv("IGNORE_RETRY_AFTER", "0") == "1") + try: + if use_session: + # Reuse pooled session based on the base URL + try: + from urllib.parse import urlsplit + except Exception: + urlsplit = None + base_for_session = None + if urlsplit is not None: + parts = urlsplit(url) + base_for_session = f"{parts.scheme}://{parts.netloc}" if parts.scheme and parts.netloc else None + session = self._get_session(base_for_session) if base_for_session else requests + timeout = self._get_timeouts() if base_for_session else self.request_timeout + resp = session.request(method, url, headers=headers, json=json, timeout=timeout) + else: + resp = requests.request(method, url, headers=headers, json=json, timeout=self.request_timeout) + except requests.RequestException as e: + if attempt < max_retries - 1: + print(f"{provider} network error (attempt {attempt + 1}): {e}") + time.sleep(api_delay) + continue + raise UnifiedClientError(f"{provider} network error: {e}") + + status = resp.status_code + if status in expected_status: + return resp + + # Rate limit handling (429) + if status == 429: + # Print detailed 429 info (match SDK-level detail) + try: + ct = (resp.headers.get('content-type') or '').lower() + retry_after_val = resp.headers.get('Retry-After', '') + rl_remaining = resp.headers.get('X-RateLimit-Remaining') or resp.headers.get('x-ratelimit-remaining') + rl_limit = resp.headers.get('X-RateLimit-Limit') or resp.headers.get('x-ratelimit-limit') + rl_reset = resp.headers.get('X-RateLimit-Reset') or resp.headers.get('x-ratelimit-reset') + detail_msg = None + if 'application/json' in ct: + try: + body = resp.json() + if isinstance(body, dict): + err = body.get('error') or {} + detail_msg = err.get('message') or body.get('message') or None + err_code = err.get('code') or body.get('code') or None + if detail_msg: + print(f"{provider} 429: {detail_msg} | code: {err_code} | retry-after: {retry_after_val} | remaining: {rl_remaining} reset: {rl_reset} limit: {rl_limit}") + else: + print(f"{provider} 429: {resp.text[:1200]} | retry-after: {retry_after_val} | remaining: {rl_remaining} reset: {rl_reset} limit: {rl_limit}") + except Exception: + print(f"{provider} 429 (non-JSON parse): ct={ct} retry-after: {retry_after_val} | remaining: {rl_remaining} reset: {rl_reset} limit: {rl_limit}") + else: + print(f"{provider} 429: ct={ct} retry-after: {retry_after_val} | remaining: {rl_remaining} reset: {rl_reset} limit: {rl_limit}") + except Exception: + pass + + # Check if indefinite rate limit retry is enabled + indefinite_retry_enabled = os.getenv("INDEFINITE_RATE_LIMIT_RETRY", "1") == "1" + + retry_after_val = resp.headers.get('Retry-After', '') + retry_secs = self._parse_retry_after(retry_after_val) + if ignore_retry_after: + wait_time = api_delay * 10 + else: + wait_time = retry_secs if retry_secs > 0 else api_delay * 10 + + # Add jitter and cap wait time + wait_time = min(wait_time + random.uniform(1, 5), 300) # Max 5 minutes + + if indefinite_retry_enabled: + # For indefinite retry, don't count against max_retries + print(f"{provider} rate limit ({status}), indefinite retry enabled - waiting {wait_time:.1f}s") + waited = 0.0 + while waited < wait_time: + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + time.sleep(0.5) + waited += 0.5 + # Don't increment attempt counter for rate limits when indefinite retry is enabled + attempt = max(0, attempt - 1) + continue + elif attempt < max_retries - 1: + # Standard retry behavior when indefinite retry is disabled + print(f"{provider} rate limit ({status}), waiting {wait_time:.1f}s (attempt {attempt + 1}/{max_retries})") + waited = 0.0 + while waited < wait_time: + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + time.sleep(0.5) + waited += 0.5 + continue + + # If we reach here, indefinite retry is disabled and we've exhausted max_retries + raise UnifiedClientError(f"{provider} rate limit: {resp.text}", error_type="rate_limit", http_status=429) + + # Transient server errors with optional Retry-After + if status in (500, 502, 503, 504) and attempt < max_retries - 1: + retry_after_val = resp.headers.get('Retry-After', '') + retry_secs = self._parse_retry_after(retry_after_val) + if ignore_retry_after: + retry_secs = 0 + if retry_secs: + sleep_for = retry_secs + random.uniform(0, 1) + else: + base_delay = 5.0 + sleep_for = min(base_delay * (2 ** attempt) + random.uniform(0, 1), 60.0) + print(f"{provider} {status} - retrying in {sleep_for:.1f}s (attempt {attempt + 1}/{max_retries})") + waited = 0.0 + while waited < sleep_for: + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + time.sleep(0.5) + waited += 0.5 + continue + + # Other non-success statuses + if attempt < max_retries - 1: + print(f"{provider} API error: {status} - {resp.text} (attempt {attempt + 1})") + time.sleep(api_delay) + continue + raise UnifiedClientError(f"{provider} API error: {status} - {resp.text}", http_status=status) + + def _extract_openai_json(self, json_resp: dict): + """Extract content, finish_reason, and usage from OpenAI-compatible JSON.""" + content = "" + finish_reason = 'stop' + choices = json_resp.get('choices', []) + if choices: + choice = choices[0] + finish_reason = choice.get('finish_reason') or 'stop' + message = choice.get('message') + if isinstance(message, dict): + content = message.get('content') or message.get('text') or "" + elif isinstance(message, str): + content = message + else: + # As a fallback, try 'text' field directly on choice + content = choice.get('text', "") + # Normalize finish reasons + if finish_reason in ['max_tokens', 'max_length']: + finish_reason = 'length' + usage = None + if 'usage' in json_resp: + u = json_resp['usage'] or {} + pt = u.get('prompt_tokens', 0) + ct = u.get('completion_tokens', 0) + tt = u.get('total_tokens', pt + ct) + usage = {'prompt_tokens': pt, 'completion_tokens': ct, 'total_tokens': tt} + return content, finish_reason, usage + + def _with_sdk_retries(self, provider_name: str, max_retries: int, call): + """Run an SDK call with standardized retry behavior and error wrapping.""" + api_delay = self._get_send_interval() + for attempt in range(max_retries): + try: + if self._cancelled: + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + return call() + except UnifiedClientError: + # Already normalized; propagate + raise + except Exception as e: + # Suppress noise if we are stopping/cleaning up or the SDK surfaced a cancellation + err_str = str(e) + is_cancel = getattr(self, '_cancelled', False) or ('cancelled' in err_str.lower()) or ('canceled' in err_str.lower()) + if is_cancel: + # Normalize and stop retry/printing + raise UnifiedClientError("Operation cancelled", error_type="cancelled") + if attempt < max_retries - 1: + self._debug_log(f"{provider_name} SDK error (attempt {attempt + 1}): {e}") + time.sleep(api_delay) + continue + self._debug_log(f"{provider_name} SDK error after all retries: {e}") + raise UnifiedClientError(f"{provider_name} SDK error: {e}") + + def _build_openai_headers(self, provider: str, api_key: str, headers: Optional[dict]) -> dict: + """Construct standard headers for OpenAI-compatible HTTP calls without altering behavior.""" + h = dict(headers) if headers else {} + # Only set Authorization if not already present and not Azure special-case (Azure handled earlier) + if 'Authorization' not in h and provider not in ('azure',): + h['Authorization'] = f'Bearer {api_key}' + # Ensure content type + if 'Content-Type' not in h: + h['Content-Type'] = 'application/json' + # Ensure we explicitly request JSON back from providers + if 'Accept' not in h: + h['Accept'] = 'application/json' + return h + + def _apply_openai_safety(self, provider: str, disable_safety: bool, payload: dict, headers: dict): + """Apply safety flags for providers that support them (avoid unsupported params).""" + if not disable_safety: + return + # Do NOT send 'moderation' to OpenAI; it's unsupported and causes 400 Unknown parameter + if provider in ["groq", "fireworks"]: + payload["moderation"] = False + elif provider == "poe": + payload["safe_mode"] = False + elif provider == "openrouter": + headers['X-Safe-Mode'] = 'false' + + def _build_anthropic_payload(self, formatted_messages: list, temperature: float, max_tokens: int, anti_dupe_params: dict, system_message: Optional[str] = None) -> dict: + data = { + "model": self.model, + "messages": formatted_messages, + "temperature": temperature, + "max_tokens": max_tokens, + **(anti_dupe_params or {}) + } + if system_message: + data["system"] = system_message + return data + + def _parse_anthropic_json(self, json_resp: dict): + content_parts = json_resp.get("content", []) + if isinstance(content_parts, list): + content = "".join(part.get("text", "") for part in content_parts) + else: + content = str(content_parts) + finish_reason = json_resp.get("stop_reason") + if finish_reason == "max_tokens": + finish_reason = "length" + elif finish_reason == "stop_sequence": + finish_reason = "stop" + usage = json_resp.get("usage") + if usage: + usage = { + 'prompt_tokens': usage.get('input_tokens', 0), + 'completion_tokens': usage.get('output_tokens', 0), + 'total_tokens': usage.get('input_tokens', 0) + usage.get('output_tokens', 0) + } + else: + usage = None + return content, finish_reason, usage + + def _get_idempotency_key(self) -> str: + """Build an idempotency key from the current request context.""" + tls = self._get_thread_local_client() + req_id = getattr(tls, "idem_request_id", None) or uuid.uuid4().hex[:8] + attempt = getattr(tls, "idem_attempt", 0) + return f"{req_id}-a{attempt}" + + def _get_openai_client(self, base_url: str, api_key: str): + """Get or create a thread-local OpenAI client for a base_url.""" + if openai is None: + raise UnifiedClientError("OpenAI SDK not installed. Install with: pip install openai") + + # CRITICAL: If individual endpoint is applied, use our existing client instead of creating new one + if (hasattr(self, '_individual_endpoint_applied') and self._individual_endpoint_applied and + hasattr(self, 'openai_client') and self.openai_client): + return self.openai_client + + tls = self._get_thread_local_client() + if not hasattr(tls, "openai_clients"): + tls.openai_clients = {} + map_key = f"{base_url}|{bool(api_key)}" + client = tls.openai_clients.get(map_key) + if client is None: + timeout_obj = None + try: + if httpx is not None: + connect, read = self._get_timeouts() + timeout_obj = httpx.Timeout(connect=connect, read=read, write=read, pool=connect) + else: + # Fallback: use read timeout as a single float + _, read = self._get_timeouts() + timeout_obj = float(read) + except Exception: + _, read = self._get_timeouts() + timeout_obj = float(read) + client = openai.OpenAI( + api_key=api_key, + base_url=base_url, + timeout=timeout_obj + ) + tls.openai_clients[map_key] = client + return client + + def _get_response(self, messages, temperature, max_tokens, max_completion_tokens, response_name) -> UnifiedResponse: + """ + Route to appropriate AI provider and get response + + Args: + messages: List of message dicts + temperature: Sampling temperature + max_tokens: Maximum tokens (for non-o series models) + max_completion_tokens: Maximum completion tokens (for o-series models) + response_name: Name for saving response + """ + self._apply_api_call_stagger() + # Ensure client_type is initialized before routing (important for multi-key mode) + try: + if not hasattr(self, 'client_type') or self.client_type is None: + self._ensure_thread_client() + except Exception: + # Guard against missing attribute in extreme early paths + if not hasattr(self, 'client_type'): + self.client_type = None + + # FIX: Ensure max_tokens has a value before passing to handlers + if max_tokens is None and max_completion_tokens is None: + # Use instance default or standard default + max_tokens = getattr(self, 'max_tokens', 8192) + elif max_tokens is None and max_completion_tokens is not None: + # For o-series models, use max_completion_tokens as fallback + max_tokens = max_completion_tokens + # Check if this is actually Gemini (including when using OpenAI endpoint) + actual_provider = self._get_actual_provider() + + # Detect if this is an image request (messages contain image parts) + has_images = False + for _m in messages: + c = _m.get('content') + if isinstance(c, list) and any(isinstance(p, dict) and p.get('type') == 'image_url' for p in c): + has_images = True + break + + # If image request, route to image handlers only for providers that require it + if has_images: + img_b64 = self._extract_first_image_base64(messages) + if actual_provider == 'gemini': + return self._send_gemini(messages, temperature, max_tokens or max_completion_tokens, response_name, image_base64=img_b64) + if actual_provider == 'anthropic': + return self._send_anthropic_image(messages, img_b64, temperature, max_tokens or max_completion_tokens, response_name) + if actual_provider == 'vertex_model_garden': + return self._send_vertex_model_garden_image(messages, img_b64, temperature, max_tokens or max_completion_tokens, response_name) + if actual_provider == 'poe': + return self._send_poe_image(messages, img_b64, temperature, max_tokens or max_completion_tokens, response_name) + # Otherwise fall through to default handler below (OpenAI-compatible providers handle images in messages) + + # Map client types to their handler methods + handlers = { + 'openai': self._send_openai, + 'gemini': self._send_gemini, + 'deepseek': self._send_openai_provider_router, # Consolidated + 'anthropic': self._send_anthropic, + 'mistral': self._send_mistral, + 'cohere': self._send_cohere, + 'chutes': self._send_openai_provider_router, # Consolidated + 'ai21': self._send_ai21, + 'together': self._send_openai_provider_router, # Already in router + 'perplexity': self._send_openai_provider_router, # Consolidated + 'replicate': self._send_replicate, + 'yi': self._send_openai_provider_router, + 'qwen': self._send_openai_provider_router, + 'baichuan': self._send_openai_provider_router, + 'zhipu': self._send_openai_provider_router, + 'moonshot': self._send_openai_provider_router, + 'groq': self._send_openai_provider_router, + 'baidu': self._send_openai_provider_router, + 'tencent': self._send_openai_provider_router, + 'iflytek': self._send_openai_provider_router, + 'bytedance': self._send_openai_provider_router, + 'minimax': self._send_openai_provider_router, + 'sensenova': self._send_openai_provider_router, + 'internlm': self._send_openai_provider_router, + 'tii': self._send_openai_provider_router, + 'microsoft': self._send_openai_provider_router, + 'azure': self._send_azure, + 'google': self._send_google_palm, + 'alephalpha': self._send_alephalpha, + 'databricks': self._send_openai_provider_router, + 'huggingface': self._send_huggingface, + 'openrouter': self._send_openai_provider_router, # OpenRouter aggregator + 'poe': self._send_poe, # POE platform (restored) + 'electronhub': self._send_electronhub, # ElectronHub aggregator (restored) + 'fireworks': self._send_openai_provider_router, + 'xai': self._send_openai_provider_router, # xAI Grok models + 'salesforce': self._send_openai_provider_router, # Consolidated + 'vertex_model_garden': self._send_vertex_model_garden, + 'deepl': self._send_deepl, # DeepL translation service + 'google_translate': self._send_google_translate, # Google Cloud Translate + } + + # IMPORTANT: Use actual_provider for routing, not client_type + # This ensures Gemini always uses its native handler even when using OpenAI endpoint + handler = handlers.get(actual_provider) + + if not handler: + # Fallback to client_type if no actual_provider match + handler = handlers.get(self.client_type) + + if not handler: + # Try fallback to Together AI for open models + if self.client_type in ['bigscience', 'meta']: + logger.info(f"Using Together AI for {self.client_type} model") + return self._send_openai_provider_router(messages, temperature, max_tokens, response_name) + raise UnifiedClientError(f"No handler for client type: {getattr(self, 'client_type', 'unknown')}") + + if self.client_type in ['deepl', 'google_translate']: + # These services don't use temperature or token limits + # They just translate the text directly + return handler(messages, None, None, response_name) + + # Route based on actual provider (handles Gemini with OpenAI endpoint correctly) + elif actual_provider == 'gemini': + # Always use Gemini handler for Gemini models, regardless of transport + logger.debug(f"Routing to Gemini handler (actual provider: {actual_provider}, client_type: {self.client_type})") + return self._send_gemini(messages, temperature, max_tokens, response_name) + elif actual_provider == 'openai' or self.client_type == 'openai': + # For OpenAI, pass the max_completion_tokens parameter + return handler(messages, temperature, max_tokens, max_completion_tokens, response_name) + elif self.client_type == 'vertex_model_garden': + # Vertex AI doesn't use response_name parameter + return handler(messages, temperature, max_tokens or max_completion_tokens, None, response_name) + else: + # Other providers don't use max_completion_tokens + return handler(messages, temperature, max_tokens, response_name) + + + def _get_actual_provider(self) -> str: + """ + Get the actual provider name, accounting for Gemini using OpenAI endpoint. + This is used for proper routing and detection. + """ + # Check if this is Gemini using OpenAI endpoint + if hasattr(self, '_original_client_type') and self._original_client_type: + return self._original_client_type + return getattr(self, 'client_type', 'openai') + + def _extract_chapter_label(self, messages) -> str: + """Extract a concise chapter/chunk label from messages for logging.""" + try: + s = str(messages) + import re + chap = None + m = re.search(r'Chapter\s+(\d+)', s) + if m: + chap = f"Chapter {m.group(1)}" + chunk = None + mc = re.search(r'Chunk\s+(\d+)/(\d+)', s) + if mc: + chunk = f"{mc.group(1)}/{mc.group(2)}" + if chap and chunk: + return f"{chap} (chunk {chunk})" + if chap: + return chap + if chunk: + return f"chunk {chunk}" + except Exception: + pass + return "request" + + def _log_pre_stagger(self, messages, context: Optional[str] = None) -> None: + """Emit a pre-stagger log line so users see what's being sent before delay.""" + try: + thread_name = threading.current_thread().name + label = self._extract_chapter_label(messages) + ctx = context or 'translation' + print(f"📤 [{thread_name}] Sending {label} ({ctx}) — queuing staggered API call...") + # Stash label so stagger logger can show what is being translated + try: + tls = self._get_thread_local_client() + tls.current_request_label = label + except Exception: + pass + except Exception: + # Never block on logging + pass + + def _is_gemini_request(self) -> bool: + """ + Check if this is a Gemini request (native or via OpenAI endpoint) + """ + return self._get_actual_provider() == 'gemini' + + def _is_stop_requested(self) -> bool: + """ + Check if stop was requested by checking global flag, local cancelled flag, and class-level cancellation + """ + # Check class-level global cancellation first + if self.is_globally_cancelled(): + return True + + # Check local cancelled flag (more reliable in threading context) + if getattr(self, '_cancelled', False): + return True + + try: + # Import the stop check function from the main translation module + from TransateKRtoEN import is_stop_requested + return is_stop_requested() + except ImportError: + # Fallback if import fails + return False + + def _get_anti_duplicate_params(self, temperature): + """Get user-configured anti-duplicate parameters from GUI settings""" + # Check if user enabled anti-duplicate + if os.getenv("ENABLE_ANTI_DUPLICATE", "0") != "1": + return {} + + # Get user's exact values from GUI (via environment variables) + top_p = float(os.getenv("TOP_P", "1.0")) + top_k = int(os.getenv("TOP_K", "0")) + frequency_penalty = float(os.getenv("FREQUENCY_PENALTY", "0.0")) + presence_penalty = float(os.getenv("PRESENCE_PENALTY", "0.0")) + + # Apply parameters based on provider capabilities + params = {} + + if self.client_type in ['openai', 'deepseek', 'groq', 'electronhub', 'openrouter']: + # OpenAI-compatible providers + if frequency_penalty > 0: + params["frequency_penalty"] = frequency_penalty + if presence_penalty > 0: + params["presence_penalty"] = presence_penalty + if top_p < 1.0: + params["top_p"] = top_p + + elif self.client_type == 'gemini': + # Gemini supports both top_p and top_k + if top_p < 1.0: + params["top_p"] = top_p + if top_k > 0: + params["top_k"] = top_k + + elif self.client_type == 'anthropic': + # Claude supports top_p and top_k + if top_p < 1.0: + params["top_p"] = top_p + if top_k > 0: + params["top_k"] = top_k + + # Log applied parameters + if params: + logger.info(f"Applying anti-duplicate params for {self.client_type}: {list(params.keys())}") + + return params + + def _detect_silent_truncation(self, content: str, messages: List[Dict], context: str = None) -> bool: + """ + Detect silent truncation where APIs (especially ElectronHub) cut off content + without setting proper finish_reason. + + Common patterns: + - Sentences ending abruptly without punctuation + - Content significantly shorter than expected + - Missing closing tags in structured content + - Sudden topic changes or incomplete thoughts + """ + if not content: + return False + + content_stripped = content.strip() + if not content_stripped: + return False + + # Pattern 1: Check for incomplete sentence endings (with improved logic) + # Skip this check for code contexts, JSON, or when content contains code blocks + if context not in ['code', 'json', 'data', 'list', 'python', 'javascript', 'programming']: + # Also skip if content appears to contain code + if '```' in content or 'def ' in content or 'class ' in content or 'import ' in content or 'function ' in content: + pass # Skip punctuation check for code content + else: + last_char = content_stripped[-1] + # Valid endings for PROSE/NARRATIVE text only + # Removed quotes since they're common in code + valid_endings = [ + ".", "!", "?", "»", "】", ")", ")", + "。", "!", "?", ":", ";", "]", "}", + "…", "—", "–", "*", "/", ">", "~", "%" + ] + + # Check if ends with incomplete sentence (no proper punctuation) + if last_char not in valid_endings: + # Look at the last few characters for better context + last_segment = content_stripped[-50:] if len(content_stripped) > 50 else content_stripped + + # Check for common false positive patterns + false_positive_patterns = [ + # Lists or enumerations often don't end with punctuation + r'\n\s*[-•*]\s*[^.!?]+$', # Bullet points + r'\n\s*\d+\)\s*[^.!?]+$', # Numbered lists + r'\n\s*[a-z]\)\s*[^.!?]+$', # Letter lists + # Code or technical content + r'```[^`]*$', # Inside code block + r'\$[^$]+$', # Math expressions + # Single words or short phrases (likely labels/headers) + r'^\w+$', # Single word + r'^[\w\s]{1,15}$', # Very short content + ] + + import re + is_false_positive = any(re.search(pattern, last_segment) for pattern in false_positive_patterns) + + if not is_false_positive: + # Additional check: is the last word incomplete? + words = content_stripped.split() + if words and len(words) > 3: # Only check if we have enough content + last_word = words[-1] + # Check for common incomplete patterns + # But exclude common abbreviations + common_abbreviations = {'etc', 'vs', 'eg', 'ie', 'vol', 'no', 'pg', 'ch', 'pt'} + if (len(last_word) > 2 and + last_word[-1].isalpha() and + last_word.lower() not in common_abbreviations and + not last_word.isupper()): # Exclude acronyms + + # Final check: does it look like mid-sentence? + # Look for sentence starters before the last segment + preceding_text = ' '.join(words[-10:-1]) if len(words) > 10 else ' '.join(words[:-1]) + sentence_starters = ['the', 'a', 'an', 'and', 'but', 'or', 'so', 'because', 'when', 'if', 'that'] + + # Check if we're likely mid-sentence + if any(starter in preceding_text.lower().split() for starter in sentence_starters): + print(f"Possible silent truncation detected: incomplete sentence ending") + return True + + # Pattern 2: Check for significantly short responses (with improved thresholds) + if context == 'translation': + # Calculate input length more accurately + input_content = [] + for msg in messages: + if msg.get('role') == 'user': + msg_content = msg.get('content', '') + # Handle both string and list content formats + if isinstance(msg_content, list): + for item in msg_content: + if isinstance(item, dict) and item.get('type') == 'text': + input_content.append(item.get('text', '')) + else: + input_content.append(msg_content) + + input_length = sum(len(text) for text in input_content) + + # Adjusted threshold - translations can legitimately be shorter + # Only flag if output is less than 20% of input AND input is substantial + if input_length > 1000 and len(content_stripped) < input_length * 0.2: + # Additional check: does the content seem complete? + if not content_stripped.endswith(('.', '!', '?', '"', "'", '。', '!', '?')): + print(f"Possible silent truncation: output ({len(content_stripped)} chars) much shorter than input ({input_length} chars)") + return True + + # Pattern 3: Check for incomplete HTML/XML structures (improved) + if '<' in content and '>' in content: + # More sophisticated tag matching + import re + + # Find all opening tags (excluding self-closing) + opening_tags = re.findall(r'<([a-zA-Z][^/>]*?)(?:\s[^>]*)?>',content) + closing_tags = re.findall(r'([a-zA-Z][^>]*?)>', content) + self_closing = re.findall(r'<[^>]*?/>', content) + + # Count tag mismatches + from collections import Counter + open_counts = Counter(opening_tags) + close_counts = Counter(closing_tags) + + # Check for significant mismatches + unclosed_tags = [] + for tag, count in open_counts.items(): + # Ignore void elements that don't need closing + void_elements = {'br', 'hr', 'img', 'input', 'meta', 'area', 'base', 'col', 'embed', 'link', 'param', 'source', 'track', 'wbr'} + if tag.lower() not in void_elements: + close_count = close_counts.get(tag, 0) + if count > close_count + 1: # Allow 1 tag mismatch + unclosed_tags.append(tag) + + if len(unclosed_tags) > 2: # Multiple unclosed tags indicate truncation + print(f"Possible silent truncation: unclosed HTML tags detected: {unclosed_tags}") + return True + + # Pattern 4: Check for mature content indicators (reduced false positives) + # Only check if the content is suspiciously short + if len(content_stripped) < 200: + mature_indicators = [ + 'cannot provide explicit', 'cannot generate adult', + 'unable to create sexual', 'cannot assist with mature', + 'against my guidelines to create explicit' + ] + content_lower = content_stripped.lower() + + for indicator in mature_indicators: + if indicator in content_lower: + # This is likely a refusal, not truncation + # Don't mark as truncation, let the calling code handle it + print(f"Content appears to be refused (contains '{indicator[:20]}...')") + return False # This is a refusal, not truncation + + # Pattern 5: Check for incomplete code blocks + if '```' in content: + code_block_count = content.count('```') + if code_block_count % 2 != 0: # Odd number means unclosed + # Additional check: is there actual code content? + last_block_pos = content.rfind('```') + content_after_block = content[last_block_pos + 3:].strip() + + # Only flag if there's substantial content after the opening ``` + if len(content_after_block) > 10: + print(f"Possible silent truncation: unclosed code block") + return True + + # Pattern 6: For glossary/JSON context, check for incomplete JSON (improved) + if context in ['glossary', 'json', 'data']: + # Try to detect JSON-like content + if content_stripped.startswith(('[', '{')): + # Check for matching brackets + open_brackets = content_stripped.count('[') + content_stripped.count('{') + close_brackets = content_stripped.count(']') + content_stripped.count('}') + + if open_brackets > close_brackets: + # Additional validation: try to parse as JSON + import json + try: + json.loads(content_stripped) + # It's valid JSON, not truncated + return False + except json.JSONDecodeError as e: + # Check if the error is at the end (indicating truncation) + if e.pos >= len(content_stripped) - 10: + print(f"Possible silent truncation: incomplete JSON structure") + return True + + # Pattern 7: Check for sudden endings in long content + if len(content_stripped) > 500: + # Look for patterns that indicate mid-thought truncation + last_100_chars = content_stripped[-100:] + + # Check for incomplete patterns at the end + incomplete_patterns = [ + r',\s*$', # Ends with comma + r';\s*$', # Ends with semicolon + r'\w+ing\s+$', # Ends with -ing word (often mid-action) + r'\b(and|or|but|with|for|to|in|on|at)\s*$', # Ends with conjunction/preposition + r'\b(the|a|an)\s*$', # Ends with article + ] + + import re + for pattern in incomplete_patterns: + if re.search(pattern, last_100_chars, re.IGNORECASE): + # Double-check this isn't a false positive + # Look at the broader context + sentences = content_stripped.split('.') + if len(sentences) > 3: # Has multiple sentences + last_sentence = sentences[-1].strip() + if len(last_sentence) > 20: # Substantial incomplete sentence + print(f"Possible silent truncation: content ends mid-thought") + return True + + return False + + def _enhance_electronhub_response(self, response: UnifiedResponse, messages: List[Dict], + context: str = None) -> UnifiedResponse: + """ + Enhance ElectronHub responses with better truncation detection and handling. + ElectronHub sometimes silently truncates without proper finish_reason. + """ + # If already marked as truncated, no need to check further + if response.is_truncated: + return response + + # Check for silent truncation + if self._detect_silent_truncation(response.content, messages, context): + print(f"Silent truncation detected for {self.model} via ElectronHub") + + # Check if it's likely censorship vs length limit + content_lower = response.content.lower() + censorship_phrases = [ + "i cannot", "i can't", "inappropriate", "unable to process", + "against my guidelines", "cannot assist", "not able to", + "i'm not able", "i am not able", "cannot provide", "can't provide" + ] + + is_censorship = any(phrase in content_lower for phrase in censorship_phrases) + + if is_censorship: + # This is content refusal, not truncation + logger.info("Detected content refusal rather than truncation") + response.finish_reason = 'content_filter' + response.error_details = { + 'type': 'content_refused', + 'provider': 'electronhub', + 'model': self.model, + 'detection': 'silent_censorship' + } + else: + # This is actual truncation + response.finish_reason = 'length' # Mark as truncated for retry logic + response.error_details = { + 'type': 'silent_truncation', + 'provider': 'electronhub', + 'model': self.model, + 'detection': 'pattern_analysis' + } + + # Add warning to content for translation context + if context == 'translation' and not is_censorship: + response.content += "\n[WARNING: Response may be truncated]" + + return response + + def _send_electronhub(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to ElectronHub API aggregator with enhanced truncation detection + + ElectronHub provides access to multiple AI models through a unified endpoint. + Model names should be prefixed with 'eh/', 'electronhub/', or 'electron/'. + + Examples: + - eh/yi-34b-chat-200k + - electronhub/gpt-4.5 + - electron/claude-4-opus + + Note: ElectronHub uses OpenAI-compatible API format. + This version includes silent truncation detection for mature content. + """ + # Get ElectronHub endpoint (can be overridden via environment) + base_url = os.getenv("ELECTRONHUB_API_URL", "https://api.electronhub.ai/v1") + + # Store original model name for error messages and restoration + original_model = self.model + + # Strip the ElectronHub prefix from the model name + # This is critical - ElectronHub expects the model name WITHOUT the prefix + actual_model = self.model + + # Define prefixes to strip (in order of likelihood) + electronhub_prefixes = ['eh/', 'electronhub/', 'electron/'] + + # Strip the first matching prefix + for prefix in electronhub_prefixes: + if actual_model.startswith(prefix): + actual_model = actual_model[len(prefix):] + logger.info(f"Stripped '{prefix}' prefix from model name: '{original_model}' -> '{actual_model}'") + print(f"🔌 ElectronHub: Using model '{actual_model}' (stripped from '{original_model}')") + break + else: + # No prefix found - this shouldn't happen if routing worked correctly + print(f"No ElectronHub prefix found in model '{self.model}', using as-is") + print(f"⚠️ ElectronHub: No prefix found in '{self.model}', using as-is") + + # Log the API call details + logger.info(f"Sending to ElectronHub API: model='{actual_model}', endpoint='{base_url}'") + + # Debug: Log system prompt if present + for msg in messages: + if msg.get('role') == 'system': + logger.debug(f"ElectronHub - System prompt detected: {len(msg.get('content', ''))} chars") + print(f"📝 ElectronHub: Sending system prompt ({len(msg.get('content', ''))} characters)") + break + else: + print("ElectronHub - No system prompt found in messages") + print("⚠️ ElectronHub: No system prompt in messages") + + # Check if we should warn about potentially problematic models + #problematic_models = ['claude', 'gpt-4', 'gpt-3.5', 'gemini'] + #if any(model in actual_model.lower() for model in problematic_models): + #print(f"⚠️ ElectronHub: Model '{actual_model}' may have strict content filters") + + # Check for mature content indicators + all_content = ' '.join(msg.get('content', '') for msg in messages).lower() + mature_indicators = ['mature', 'adult', 'explicit', 'sexual', 'violence', 'intimate'] + #if any(indicator in all_content for indicator in mature_indicators): + #print(f"💡 ElectronHub: Consider using models like yi-34b-chat, deepseek-chat, or llama-2-70b for this content") + + # Temporarily update self.model for the API call + # This is necessary because _send_openai_compatible uses self.model + self.model = actual_model + + try: + # Make the API call using OpenAI-compatible format + result = self._send_openai_compatible( + messages, temperature, max_tokens, + base_url=base_url, + response_name=response_name, + provider="electronhub" + ) + + # ENHANCEMENT: Check for silent truncation/censorship + enhanced_result = self._enhance_electronhub_response(result, messages, self.context) + + if enhanced_result.finish_reason in ['length', 'content_filter']: + self._log_truncation_failure( + messages=messages, + response_content=enhanced_result.content, + finish_reason=enhanced_result.finish_reason, + context=self.context, + error_details=enhanced_result.error_details + ) + + # Log if truncation was detected + if enhanced_result.finish_reason == 'length' and result.finish_reason != 'length': + print(f"🔍 ElectronHub: Silent truncation detected and corrected") + elif enhanced_result.finish_reason == 'content_filter' and result.finish_reason != 'content_filter': + print(f"🚫 ElectronHub: Silent content refusal detected") + + return enhanced_result + + except UnifiedClientError as e: + # Enhance error messages for common ElectronHub issues + error_str = str(e) + + if "Invalid model" in error_str or "400" in error_str or "model not found" in error_str.lower(): + # Provide helpful error message for invalid models + error_msg = ( + f"ElectronHub rejected model '{actual_model}' (original: '{original_model}').\n" + f"\nCommon ElectronHub model names:\n" + f" • OpenAI: gpt-4, gpt-4-turbo, gpt-3.5-turbo, gpt-4o, gpt-4o-mini, gpt-4.5, gpt-4.1\n" + f" • Anthropic: claude-3-opus, claude-3-sonnet, claude-3-haiku, claude-4-opus, claude-4-sonnet\n" + f" • Meta: llama-2-70b-chat, llama-2-13b-chat, llama-2-7b-chat, llama-3-70b, llama-4-70b\n" + f" • Mistral: mistral-large, mistral-medium, mixtral-8x7b\n" + f" • Google: gemini-pro, gemini-1.5-pro, gemini-2.5-pro\n" + f" • Yi: yi-34b-chat, yi-6b-chat\n" + f" • Others: deepseek-coder-33b, qwen-72b-chat, grok-3\n" + f"\nNote: Do not include version suffixes like ':latest' or ':safe'" + ) + print(f"\n❌ {error_msg}") + raise UnifiedClientError(error_msg, error_type="invalid_model", details={"attempted_model": actual_model}) + + elif "unauthorized" in error_str.lower() or "401" in error_str: + error_msg = ( + f"ElectronHub authentication failed. Please check your API key.\n" + f"Make sure you're using an ElectronHub API key, not a key from the underlying provider." + ) + print(f"\n❌ {error_msg}") + raise UnifiedClientError(error_msg, error_type="auth_error") + + elif "rate limit" in error_str.lower() or "429" in error_str: + # Preserve the original error details from OpenRouter/ElectronHub + # The original error should contain the full API response with specific details + print(f"\n⏳ ElectronHub rate limit error: {error_str}") + # Use the original error string to preserve the full OpenRouter error description + raise UnifiedClientError(error_str, error_type="rate_limit") + + else: + # Re-raise original error with context + print(f"ElectronHub API error for model '{actual_model}': {e}") + raise + + finally: + # Always restore the original model name + # This ensures subsequent calls work correctly + self.model = original_model + + def _parse_poe_tokens(self, key_str: str) -> dict: + """Parse POE cookies from a single string. + Returns a dict that always includes 'p-b' (required) and may include 'p-lat' and any + other cookies present (e.g., 'cf_clearance', '__cf_bm'). Unknown cookies are forwarded as-is. + + Accepted input formats: + - "p-b:AAA|p-lat:BBB" + - "p-b=AAA; p-lat=BBB" + - Raw cookie header with or without the "Cookie:" prefix + - Just the p-b value (long string) when no delimiter is present + """ + import re + s = (key_str or "").strip() + if s.lower().startswith("cookie:"): + s = s.split(":", 1)[1].strip() + tokens: dict = {} + # Split on | ; , or newlines + parts = re.split(r"[|;,\n]+", s) + for part in parts: + part = part.strip() + if not part: + continue + if ":" in part: + k, v = part.split(":", 1) + elif "=" in part: + k, v = part.split("=", 1) + else: + # If no delimiter and p-b not set, treat entire string as p-b + if 'p-b' not in tokens and len(part) > 20: + tokens['p-b'] = part + continue + k = k.strip().lower() + v = v.strip() + # Normalize key names + if k in ("p-b", "p_b", "pb", "p.b"): + tokens['p-b'] = v + elif k in ("p-lat", "p_lat", "plat", "p.lat"): + tokens['p-lat'] = v + else: + # Forward any additional cookie that looks valid + if re.match(r"^[a-z0-9_\-\.]+$", k): + tokens[k] = v + return tokens + + def _send_poe(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request using poe-api-wrapper""" + try: + from poe_api_wrapper import PoeApi + except ImportError: + raise UnifiedClientError( + "poe-api-wrapper not installed. Run: pip install poe-api-wrapper" + ) + + # Parse cookies using robust parser + tokens = self._parse_poe_tokens(self.api_key) + if 'p-b' not in tokens or not tokens['p-b']: + raise UnifiedClientError( + "POE tokens missing. Provide cookies as 'p-b:VALUE|p-lat:VALUE' or 'p-b=VALUE; p-lat=VALUE'", + error_type="auth_error" + ) + + # Some wrapper versions require p-lat present (empty is allowed but may reduce success rate) + if 'p-lat' not in tokens: + logger.info("No p-lat cookie provided; proceeding without it") + tokens['p-lat'] = '' + + logger.info(f"Tokens being sent: p-b={len(tokens.get('p-b', ''))} chars, p-lat={len(tokens.get('p-lat', ''))} chars") + + try: + # Create Poe client (try to pass proxy/headers if supported) + poe_kwargs = {} + ua = os.getenv("POE_USER_AGENT") or os.getenv("HTTP_USER_AGENT") + if ua: + poe_kwargs["headers"] = {"User-Agent": ua, "Referer": "https://poe.com/", "Origin": "https://poe.com"} + proxy = os.getenv("POE_PROXY") or os.getenv("HTTPS_PROXY") or os.getenv("HTTP_PROXY") + if proxy: + poe_kwargs["proxy"] = proxy + try: + poe_client = PoeApi(tokens=tokens, **poe_kwargs) + except TypeError: + # Older versions may not support headers/proxy kwargs + poe_client = PoeApi(tokens=tokens) + # Best-effort header update if client exposes httpx session + try: + if ua and hasattr(poe_client, "session") and hasattr(poe_client.session, "headers"): + poe_client.session.headers.update({"User-Agent": ua, "Referer": "https://poe.com/", "Origin": "https://poe.com"}) + except Exception: + pass + + # Get bot name + requested_model = self.model.replace('poe/', '', 1) + bot_map = { + # GPT models + 'gpt-4': 'beaver', + 'gpt-4o': 'GPT-4o', + 'gpt-3.5-turbo': 'chinchilla', + + # Claude models + 'claude': 'a2', + 'claude-instant': 'a2', + 'claude-2': 'claude_2', + 'claude-3-opus': 'claude_3_opus', + 'claude-3-sonnet': 'claude_3_sonnet', + 'claude-3-haiku': 'claude_3_haiku', + + # Gemini models + 'gemini-2.5-flash': 'gemini_1_5_flash', + 'gemini-2.5-pro': 'gemini_1_5_pro', + 'gemini-pro': 'gemini_pro', + + # Other models + 'assistant': 'assistant', + 'web-search': 'web_search', + } + bot_name = bot_map.get(requested_model.lower(), requested_model) + logger.info(f"Using bot name: {bot_name}") + + # Send message + prompt = self._messages_to_prompt(messages) + full_response = "" + + # Handle temperature and max_tokens if supported + # Note: poe-api-wrapper might not support these parameters directly + for chunk in poe_client.send_message(bot_name, prompt): + if 'response' in chunk: + full_response = chunk['response'] + + # Get the final text + final_text = chunk.get('text', full_response) if 'chunk' in locals() else full_response + + if not final_text: + raise UnifiedClientError("POE returned empty response", error_type="empty") + + return UnifiedResponse( + content=final_text, + finish_reason="stop", + raw_response=chunk if 'chunk' in locals() else {"response": full_response} + ) + + except Exception as e: + print(f"Poe API error details: {str(e)}") + # Check for specific errors + error_str = str(e).lower() + if "403" in error_str or "forbidden" in error_str or "auth" in error_str or "unauthorized" in error_str: + raise UnifiedClientError( + "POE authentication failed (403). Your cookies may be invalid or expired. " + "Copy fresh cookies (p-b and p-lat) from an active poe.com session.", + error_type="auth_error" + ) + if "rate limit" in error_str or "429" in error_str: + raise UnifiedClientError( + "POE rate limit exceeded. Please wait before trying again.", + error_type="rate_limit" + ) + raise UnifiedClientError(f"Poe API error: {e}") + + def _save_openrouter_config(self, config_data: dict, response_name: str = None): + """Save OpenRouter configuration next to the current request payloads (thread-specific directory)""" + if not os.getenv("SAVE_PAYLOAD", "1") == "1": + return + + # Handle None or empty response_name + if not response_name: + response_name = f"config_{datetime.now().strftime('%H%M%S')}" + + # Sanitize response_name + import re + response_name = re.sub(r'[<>:"/\\|?*]', '_', str(response_name)) + + # Reuse the same payload directory as other saves + thread_dir = self._get_thread_directory() + os.makedirs(thread_dir, exist_ok=True) + + # Create filename + timestamp = datetime.now().strftime("%H%M%S") + config_filename = f"openrouter_config_{timestamp}_{response_name}.json" + config_path = os.path.join(thread_dir, config_filename) + + try: + with self._file_write_lock: + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(config_data, f, indent=2, ensure_ascii=False) + #print(f"Saved OpenRouter config to: {config_path}") + except Exception as e: + print(f"Failed to save OpenRouter config: {e}") + + + def _send_fireworks(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to OpenAI API with o-series model support""" + # Check if this is actually Azure + if os.getenv('IS_AZURE_ENDPOINT') == '1': + # Route to Azure-compatible handler + base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + return self._send_openai_compatible( + messages, temperature, max_tokens, + base_url=base_url, + response_name=response_name, + provider="azure" + ) + + max_retries = self._get_max_retries() + api_delay = float(os.getenv("SEND_INTERVAL_SECONDS", "2")) + + # Track what fixes we've already tried + fixes_attempted = { + 'temperature': False, + 'system_message': False, + 'max_tokens_param': False + } + + for attempt in range(max_retries): + try: + params = self._build_openai_params(messages, temperature, max_tokens, max_completion_tokens) + + # Get user-configured anti-duplicate parameters + anti_dupe_params = self._get_anti_duplicate_params(temperature) + params.update(anti_dupe_params) + + # Apply any fixes from previous attempts + if fixes_attempted['temperature'] and 'temperature_override' in fixes_attempted: + params['temperature'] = fixes_attempted['temperature_override'] + + if fixes_attempted['system_message']: + # Convert system messages to user messages + new_messages = [] + for msg in params.get('messages', []): + if msg['role'] == 'system': + new_messages.append({ + 'role': 'user', + 'content': f"Instructions: {msg['content']}" + }) + else: + new_messages.append(msg) + params['messages'] = new_messages + + if fixes_attempted['max_tokens_param']: + if 'max_tokens' in params: + params['max_completion_tokens'] = params.pop('max_tokens') + + # Check for cancellation + if self._cancelled: + raise UnifiedClientError("Operation cancelled") + + # Log the request for debugging + logger.debug(f"OpenAI request - Model: {self.model}, Params: {list(params.keys())}") + + + # Make the API call + resp = self.openai_client.chat.completions.create( + **params, + timeout=self.request_timeout, + idempotency_key=self._get_idempotency_key() + ) + + # Enhanced response validation with detailed logging + if not resp: + print("OpenAI returned None response") + raise UnifiedClientError("OpenAI returned empty response object") + + if not hasattr(resp, 'choices'): + print(f"OpenAI response missing 'choices'. Response type: {type(resp)}") + print(f"Response attributes: {dir(resp)[:10]}") # Log first 10 attributes + raise UnifiedClientError("Invalid OpenAI response structure - missing choices") + + if not resp.choices: + print("OpenAI response has empty choices array") + # Check if this is a content filter issue + if hasattr(resp, 'model') and hasattr(resp, 'id'): + print(f"Response ID: {resp.id}, Model: {resp.model}") + raise UnifiedClientError("OpenAI returned empty choices array") + + choice = resp.choices[0] + + # Enhanced choice validation + if not hasattr(choice, 'message'): + print(f"OpenAI choice missing 'message'. Choice type: {type(choice)}") + print(f"Choice attributes: {dir(choice)[:10]}") + raise UnifiedClientError("OpenAI choice missing message") + + # Check if this is actually Gemini using OpenAI endpoint + is_gemini_via_openai = False + if hasattr(self, '_original_client_type') and self._original_client_type == 'gemini': + is_gemini_via_openai = True + logger.info("This is Gemini using OpenAI-compatible endpoint") + elif self.model.lower().startswith('gemini'): + is_gemini_via_openai = True + logger.info("Detected Gemini model via OpenAI endpoint") + + if not choice.message: + # Gemini via OpenAI sometimes returns None message + if is_gemini_via_openai: + print("Gemini via OpenAI returned None message - creating empty message") + # Create a mock message object + class MockMessage: + content = "" + refusal = None + choice.message = MockMessage() + else: + print("OpenAI choice.message is None") + raise UnifiedClientError("OpenAI message is empty") + + # Check for content with detailed debugging + content = None + + # Try different ways to get content + if hasattr(choice.message, 'content'): + content = choice.message.content + elif hasattr(choice.message, 'text'): + content = choice.message.text + elif isinstance(choice.message, dict): + content = choice.message.get('content') or choice.message.get('text') + + # Log what we found + if content is None: + # For Gemini via OpenAI, None content is common and not an error + if is_gemini_via_openai: + print("Gemini via OpenAI returned None content - likely a safety filter") + content = "" # Set to empty string instead of raising error + finish_reason = 'content_filter' + else: + print(f"OpenAI message has no content. Message type: {type(choice.message)}") + print(f"Message attributes: {dir(choice.message)[:20]}") + print(f"Message representation: {str(choice.message)[:200]}") + + # Check if this is a refusal (only if not already handled by Gemini) + if content is None and hasattr(choice.message, 'refusal') and choice.message.refusal: + print(f"OpenAI refused: {choice.message.refusal}") + # Return the refusal as content + content = f"[REFUSED BY OPENAI]: {choice.message.refusal}" + finish_reason = 'content_filter' + elif hasattr(choice, 'finish_reason'): + finish_reason = choice.finish_reason + print(f"Finish reason: {finish_reason}") + + # Check for specific finish reasons + if finish_reason == 'content_filter': + content = "[CONTENT BLOCKED BY OPENAI SAFETY FILTER]" + elif finish_reason == 'length': + content = "" # Empty but will be marked as truncated + else: + # Try to extract any available info + content = f"[EMPTY RESPONSE - Finish reason: {finish_reason}]" + else: + content = "[EMPTY RESPONSE FROM OPENAI]" + + # Handle empty string content + elif content == "": + print("OpenAI returned empty string content") + finish_reason = getattr(choice, 'finish_reason', 'unknown') + + if finish_reason == 'length': + logger.info("Empty content due to length limit") + # This is a truncation at the start - token limit too low + return UnifiedResponse( + content="", + finish_reason='length', + error_details={ + 'error': 'Response truncated - increase max_completion_tokens', + 'finish_reason': 'length', + 'token_limit': params.get('max_completion_tokens') or params.get('max_tokens') + } + ) + elif finish_reason == 'content_filter': + content = "[CONTENT BLOCKED BY OPENAI]" + else: + print(f"Empty content with finish_reason: {finish_reason}") + content = f"[EMPTY - Reason: {finish_reason}]" + + # Get finish reason (with fallback) + finish_reason = getattr(choice, 'finish_reason', 'stop') + + # Normalize OpenAI finish reasons + if finish_reason == "max_tokens": + finish_reason = "length" + + # Special handling for Gemini empty responses + if is_gemini_via_openai and content == "" and finish_reason == 'stop': + # Empty content with 'stop' from Gemini usually means safety filter + print("Empty Gemini response with finish_reason='stop' - likely safety filter") + content = "[BLOCKED BY GEMINI SAFETY FILTER]" + finish_reason = 'content_filter' + + # Extract usage + usage = None + if hasattr(resp, 'usage') and resp.usage: + usage = { + 'prompt_tokens': resp.usage.prompt_tokens, + 'completion_tokens': resp.usage.completion_tokens, + 'total_tokens': resp.usage.total_tokens + } + logger.debug(f"Token usage: {usage}") + + # Log successful response + logger.info(f"OpenAI response - Content length: {len(content) if content else 0}, Finish reason: {finish_reason}") + + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=resp + ) + + except OpenAIError as e: + error_str = str(e) + error_dict = None + + # Try to extract error details + try: + if hasattr(e, 'response') and hasattr(e.response, 'json'): + error_dict = e.response.json() + print(f"OpenAI error details: {error_dict}") + except: + pass + + # Check if we can fix the error and retry + should_retry = False + + # Handle temperature constraints reactively + if not fixes_attempted['temperature'] and "temperature" in error_str and ("does not support" in error_str or "unsupported_value" in error_str): + # Extract what temperature the model wants + default_temp = 1 # Default fallback + if "Only the default (1)" in error_str: + default_temp = 1 + elif error_dict and 'error' in error_dict: + # Try to parse the required temperature from error message + import re + temp_match = re.search(r'default \((\d+(?:\.\d+)?)\)', error_dict['error'].get('message', '')) + if temp_match: + default_temp = float(temp_match.group(1)) + + # Send message to GUI + print(f"🔄 Model {self.model} requires temperature={default_temp}, retrying...") + + print(f"Model {self.model} requires temperature={default_temp}, will retry...") + fixes_attempted['temperature'] = True + fixes_attempted['temperature_override'] = default_temp + should_retry = True + + # Handle system message constraints reactively + elif not fixes_attempted['system_message'] and "system" in error_str.lower() and ("not supported" in error_str or "unsupported" in error_str): + print(f"Model {self.model} doesn't support system messages, will convert and retry...") + fixes_attempted['system_message'] = True + should_retry = True + + # Handle max_tokens vs max_completion_tokens reactively + elif not fixes_attempted['max_tokens_param'] and "max_tokens" in error_str and ("not supported" in error_str or "max_completion_tokens" in error_str): + print(f"Switching from max_tokens to max_completion_tokens for model {self.model}") + fixes_attempted['max_tokens_param'] = True + should_retry = True + time.sleep(api_delay) + continue + + # Handle rate limits + elif "rate limit" in error_str.lower() or "429" in error_str: + # In multi-key mode, don't retry here - let outer handler rotate keys + if self._multi_key_mode: + print(f"OpenAI rate limit hit in multi-key mode - passing to key rotation") + raise UnifiedClientError(f"OpenAI rate limit: {e}", error_type="rate_limit") + elif attempt < max_retries - 1: + # Single key mode - wait and retry + wait_time = api_delay * 10 + print(f"Rate limit hit, waiting {wait_time}s before retry") + time.sleep(wait_time) + continue + + # If we identified a fix, retry immediately + if should_retry and attempt < max_retries - 1: + time.sleep(api_delay) + continue + + # Other errors or no retries left + if attempt < max_retries - 1: + print(f"OpenAI error (attempt {attempt + 1}/{max_retries}): {e}") + time.sleep(api_delay) + continue + + print(f"OpenAI error after all retries: {e}") + raise UnifiedClientError(f"OpenAI error: {e}", error_type="api_error") + + except Exception as e: + if attempt < max_retries - 1: + print(f"Unexpected error (attempt {attempt + 1}/{max_retries}): {e}") + time.sleep(api_delay) + continue + raise UnifiedClientError(f"OpenAI error: {e}", error_type="unknown") + + raise UnifiedClientError("OpenAI API failed after all retries") + + def _build_openai_params(self, messages, temperature, max_tokens, max_completion_tokens=None): + """Build parameters for OpenAI API call""" + params = { + "model": self.model, + "messages": messages, + "temperature": temperature + } + + # Determine which token parameter to use based on model + if self._is_o_series_model(): + # o-series models use max_completion_tokens + # The manga translator passes the actual value as max_tokens for now + if max_completion_tokens is not None: + params["max_completion_tokens"] = max_completion_tokens + elif max_tokens is not None: + params["max_completion_tokens"] = max_tokens + logger.debug(f"Using max_completion_tokens={max_tokens} for o-series model {self.model}") + else: + # Regular models use max_tokens + if max_tokens is not None: + params["max_tokens"] = max_tokens + + return params + + def _supports_thinking(self) -> bool: + """Check if the current Gemini model supports thinking parameter""" + if not self.model: + return False + + model_lower = self.model.lower() + + # According to Google documentation, thinking is supported on: + # 1. All Gemini 2.5 series models (Pro, Flash, Flash-Lite) + # 2. Gemini 2.0 Flash Thinking Experimental model + + # Check for Gemini 2.5 series + if 'gemini-2.5' in model_lower: + return True + + # Check for Gemini 2.0 Flash Thinking model variants + thinking_models = [ + 'gemini-2.0-flash-thinking-exp', + 'gemini-2.0-flash-thinking-experimental', + 'gemini-2.0-flash-thinking-exp-1219', + 'gemini-2.0-flash-thinking-exp-01-21', + ] + + for thinking_model in thinking_models: + if thinking_model in model_lower: + return True + + return False + + def _get_thread_directory(self): + """Get thread-specific directory for payload storage""" + thread_name = threading.current_thread().name + # Prefer the client's explicit context if available + explicit = getattr(self, 'context', None) + if explicit in ('translation', 'glossary', 'summary'): + context = explicit + else: + if 'Translation' in thread_name: + context = 'translation' + elif 'Glossary' in thread_name: + context = 'glossary' + elif 'Summary' in thread_name: + context = 'summary' + else: + context = 'general' + + thread_dir = os.path.join("Payloads", context, f"{thread_name}_{threading.current_thread().ident}") + os.makedirs(thread_dir, exist_ok=True) + return thread_dir + + def _save_gemini_safety_config(self, config_data: dict, response_name: str = None): + """Save Gemini safety configuration next to the current request payloads""" + if not os.getenv("SAVE_PAYLOAD", "1") == "1": + return + + # Handle None or empty response_name + if not response_name: + response_name = f"safety_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + # Sanitize response_name to ensure it's filesystem-safe + # Remove or replace invalid characters + import re + response_name = re.sub(r'[<>:\"/\\|?*]', '_', str(response_name)) + + # Reuse the same payload directory as other saves + thread_dir = self._get_thread_directory() + os.makedirs(thread_dir, exist_ok=True) + + # Create unique filename with timestamp + timestamp = datetime.now().strftime("%H%M%S") + + # Ensure response_name doesn't already contain timestamp to avoid duplication + if timestamp not in response_name: + config_filename = f"gemini_safety_{timestamp}_{response_name}.json" + else: + config_filename = f"gemini_safety_{response_name}.json" + + config_path = os.path.join(thread_dir, config_filename) + + try: + with self._file_write_lock: + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(config_data, f, indent=2, ensure_ascii=False) + # Only log if not stopping + if not self._is_stop_requested(): + print(f"Saved Gemini safety status to: {config_path}") + except Exception as e: + # Only log errors if not stopping + if not self._is_stop_requested(): + print(f"Failed to save Gemini safety config: {e}") + + def _send_gemini(self, messages, temperature, max_tokens, response_name, image_base64=None) -> UnifiedResponse: + """Send request to Gemini API with support for both text and multi-image messages""" + response = None + + # Check if we should use OpenAI-compatible endpoint + use_openai_endpoint = os.getenv("USE_GEMINI_OPENAI_ENDPOINT", "0") == "1" + gemini_endpoint = os.getenv("GEMINI_OPENAI_ENDPOINT", "") + + # Import types at the top + from google.genai import types + + # Check if this contains images + has_images = image_base64 is not None # Direct image parameter + if not has_images: + for msg in messages: + if isinstance(msg.get('content'), list): + for part in msg['content']: + if part.get('type') == 'image_url': + has_images = True + break + if has_images: + break + + # Check if safety settings are disabled + disable_safety = os.getenv("DISABLE_GEMINI_SAFETY", "false").lower() == "true" + + # Get thinking budget from environment + thinking_budget = int(os.getenv("THINKING_BUDGET", "-1")) + + # Check if this model supports thinking + supports_thinking = self._supports_thinking() + + # Configure safety settings + safety_settings = None + if disable_safety: + # Set all safety categories to BLOCK_NONE (most permissive) + safety_settings = [ + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, + threshold=types.HarmBlockThreshold.BLOCK_NONE + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, + threshold=types.HarmBlockThreshold.BLOCK_NONE + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_HARASSMENT, + threshold=types.HarmBlockThreshold.BLOCK_NONE + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, + threshold=types.HarmBlockThreshold.BLOCK_NONE + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY, + threshold=types.HarmBlockThreshold.BLOCK_NONE + ), + ] + if not self._is_stop_requested(): + logger.info("Gemini safety settings disabled - using BLOCK_NONE for all categories") + else: + if not self._is_stop_requested(): + logger.info("Using default Gemini safety settings") + + # Define retry attempts + attempts = self._get_max_retries() + attempt = 0 + error_details = {} + + # Prepare configuration data + readable_safety = "DEFAULT" + safety_status = "ENABLED - Using default Gemini safety settings" + if disable_safety: + safety_status = "DISABLED - All categories set to BLOCK_NONE" + readable_safety = { + "HATE_SPEECH": "BLOCK_NONE", + "SEXUALLY_EXPLICIT": "BLOCK_NONE", + "HARASSMENT": "BLOCK_NONE", + "DANGEROUS_CONTENT": "BLOCK_NONE", + "CIVIC_INTEGRITY": "BLOCK_NONE" + } + + # Log to console with thinking status - only if not stopping + if not self._is_stop_requested(): + endpoint_info = f" (via OpenAI endpoint: {gemini_endpoint})" if use_openai_endpoint else " (native API)" + print(f"🔒 Gemini Safety Status: {safety_status}{endpoint_info}") + + thinking_status = "" + if supports_thinking: + if thinking_budget == 0: + thinking_status = " (thinking disabled)" + elif thinking_budget == -1: + thinking_status = " (dynamic thinking)" + elif thinking_budget > 0: + thinking_status = f" (thinking budget: {thinking_budget})" + else: + thinking_status = " (thinking not supported)" + + print(f"🧠 Thinking Status: {thinking_status}") + + # Save configuration to file + request_type = "IMAGE_REQUEST" if has_images else "TEXT_REQUEST" + if use_openai_endpoint: + request_type = "GEMINI_OPENAI_ENDPOINT_" + request_type + config_data = { + "type": request_type, + "model": self.model, + "endpoint": gemini_endpoint if use_openai_endpoint else "native", + "safety_enabled": not disable_safety, + "safety_settings": readable_safety, + "temperature": temperature, + "max_output_tokens": max_tokens, + "thinking_supported": supports_thinking, + "thinking_budget": thinking_budget if supports_thinking else None, + "timestamp": datetime.now().isoformat(), + } + + # Save configuration to file with thread isolation + self._save_gemini_safety_config(config_data, response_name) + + # Main attempt loop - SAME FOR BOTH ENDPOINTS + while attempt < attempts: + try: + if self._cancelled: + raise UnifiedClientError("Operation cancelled") + + # Get user-configured anti-duplicate parameters + anti_dupe_params = self._get_anti_duplicate_params(temperature) + + # Build generation config with anti-duplicate parameters + generation_config_params = { + "temperature": temperature, + "max_output_tokens": max_tokens, + **anti_dupe_params # Add user's custom parameters + } + + # Log the request - only if not stopping + if not self._is_stop_requested(): + print(f" 📊 Temperature: {temperature}, Max tokens: {max_tokens}") + + # ========== MAKE THE API CALL - DIFFERENT FOR EACH ENDPOINT ========== + if use_openai_endpoint and gemini_endpoint: + # Ensure the endpoint ends with /openai/ for compatibility + if not gemini_endpoint.endswith('/openai/'): + if gemini_endpoint.endswith('/'): + gemini_endpoint = gemini_endpoint + 'openai/' + else: + gemini_endpoint = gemini_endpoint + '/openai/' + + # Call OpenAI-compatible endpoint + response = self._send_openai_compatible( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + base_url=gemini_endpoint, + response_name=response_name, + provider="gemini-openai" + ) + + # For OpenAI endpoint, we already have a UnifiedResponse + # Extract any thinking tokens if available + thinking_tokens_displayed = False + + if hasattr(response, 'raw_response'): + raw_resp = response.raw_response + + # Check multiple possible locations for thinking tokens + thinking_tokens = 0 + + # Check in usage object + if hasattr(raw_resp, 'usage'): + usage = raw_resp.usage + + # Try various field names that might contain thinking tokens + if hasattr(usage, 'thoughts_token_count'): + thinking_tokens = usage.thoughts_token_count or 0 + elif hasattr(usage, 'thinking_tokens'): + thinking_tokens = usage.thinking_tokens or 0 + elif hasattr(usage, 'reasoning_tokens'): + thinking_tokens = usage.reasoning_tokens or 0 + + # Also check if there's a breakdown in the usage + if hasattr(usage, 'completion_tokens_details'): + details = usage.completion_tokens_details + if hasattr(details, 'reasoning_tokens'): + thinking_tokens = details.reasoning_tokens or 0 + + # Check in the raw response itself + if thinking_tokens == 0 and hasattr(raw_resp, '__dict__'): + # Look for thinking-related fields in the response + for field_name in ['thoughts_token_count', 'thinking_tokens', 'reasoning_tokens']: + if field_name in raw_resp.__dict__: + thinking_tokens = raw_resp.__dict__[field_name] or 0 + if thinking_tokens > 0: + break + + # Display thinking tokens if found or if thinking was requested - only if not stopping + if supports_thinking and not self._is_stop_requested(): + if thinking_tokens > 0: + print(f" 💭 Thinking tokens used: {thinking_tokens}") + thinking_tokens_displayed = True + elif thinking_budget == 0: + print(f" ✅ Thinking successfully disabled (0 thinking tokens)") + thinking_tokens_displayed = True + elif thinking_budget == -1: + # Dynamic thinking - might not be reported + print(f" 💭 Thinking: Dynamic mode (tokens may not be reported via OpenAI endpoint)") + thinking_tokens_displayed = True + elif thinking_budget > 0: + # Specific budget requested but not reported + print(f" ⚠️ Thinking budget set to {thinking_budget} but tokens not reported via OpenAI endpoint") + thinking_tokens_displayed = True + + # If we haven't displayed thinking status yet and it's supported, show a message + if not thinking_tokens_displayed and supports_thinking: + logger.debug("Thinking tokens may have been used but are not reported via OpenAI endpoint") + + # Check finish reason for prohibited content + if response.finish_reason == 'content_filter' or response.finish_reason == 'prohibited_content': + raise UnifiedClientError( + "Content blocked by Gemini OpenAI endpoint", + error_type="prohibited_content", + details={"endpoint": "openai", "finish_reason": response.finish_reason} + ) + + return response + + else: + # Native Gemini API call + # Prepare content based on whether we have images + if has_images: + # Handle image content + contents = self._prepare_gemini_image_content(messages, image_base64) + else: + # text-only: use formatted prompt + formatted_prompt = self._format_prompt(messages, style='gemini') + contents = formatted_prompt + # Only add thinking_config if the model supports it + if supports_thinking: + # Create thinking config separately + thinking_config = types.ThinkingConfig( + thinking_budget=thinking_budget + ) + + # Create generation config with thinking_config as a parameter + generation_config = types.GenerateContentConfig( + thinking_config=thinking_config, + **generation_config_params + ) + else: + # Create generation config without thinking_config + generation_config = types.GenerateContentConfig( + **generation_config_params + ) + + # Add safety settings to config if they exist + if safety_settings: + generation_config.safety_settings = safety_settings + + # Make the native API call + # Make the native API call with proper error handling + try: + # Check if gemini_client exists and is not None + if not hasattr(self, 'gemini_client') or self.gemini_client is None: + print("⚠️ Gemini client is None. This typically happens when stop was requested.") + raise UnifiedClientError("Gemini client not initialized - operation may have been cancelled", error_type="cancelled") + + response = self.gemini_client.models.generate_content( + model=self.model, + contents=contents, + config=generation_config + ) + except AttributeError as e: + if "'NoneType' object has no attribute 'models'" in str(e): + print("⚠️ Gemini client is None or invalid. This typically happens when stop was requested.") + raise UnifiedClientError("Gemini client not initialized - operation may have been cancelled", error_type="cancelled") + else: + raise + + # Check for blocked content in prompt_feedback + if hasattr(response, 'prompt_feedback'): + feedback = response.prompt_feedback + if hasattr(feedback, 'block_reason') and feedback.block_reason: + error_details['block_reason'] = str(feedback.block_reason) + if disable_safety: + print(f"Content blocked despite safety disabled: {feedback.block_reason}") + else: + print(f"Content blocked: {feedback.block_reason}") + + # Raise as UnifiedClientError with prohibited_content type + raise UnifiedClientError( + f"Content blocked: {feedback.block_reason}", + error_type="prohibited_content", + details={"block_reason": str(feedback.block_reason)} + ) + + # Check if response has candidates with prohibited content finish reason + prohibited_detected = False + finish_reason = 'stop' # Default + + if hasattr(response, 'candidates') and response.candidates: + for candidate in response.candidates: + if hasattr(candidate, 'finish_reason'): + finish_reason_str = str(candidate.finish_reason) + if 'PROHIBITED_CONTENT' in finish_reason_str: + prohibited_detected = True + finish_reason = 'prohibited_content' + print(f" 🚫 Candidate has prohibited content finish reason: {finish_reason_str}") + break + elif 'MAX_TOKENS' in finish_reason_str: + finish_reason = 'length' + elif 'SAFETY' in finish_reason_str: + finish_reason = 'safety' + + # If prohibited content detected, raise error for retry logic + if prohibited_detected: + # Get thinking tokens if available for debugging + thinking_tokens_wasted = 0 + if hasattr(response, 'usage_metadata') and hasattr(response.usage_metadata, 'thoughts_token_count'): + thinking_tokens_wasted = response.usage_metadata.thoughts_token_count or 0 + if thinking_tokens_wasted > 0: + print(f" ⚠️ Wasted {thinking_tokens_wasted} thinking tokens on prohibited content") + + raise UnifiedClientError( + "Content blocked: FinishReason.PROHIBITED_CONTENT", + error_type="prohibited_content", + details={ + "finish_reason": "PROHIBITED_CONTENT", + "thinking_tokens_wasted": thinking_tokens_wasted + } + ) + + # Log thinking token usage if available - only if not stopping + if hasattr(response, 'usage_metadata') and not self._is_stop_requested(): + usage = response.usage_metadata + if supports_thinking and hasattr(usage, 'thoughts_token_count'): + if usage.thoughts_token_count and usage.thoughts_token_count > 0: + print(f" 💭 Thinking tokens used: {usage.thoughts_token_count}") + else: + print(f" ✅ Thinking successfully disabled (0 thinking tokens)") + + # Extract text from the Gemini response - FIXED LOGIC HERE + text_content = "" + + # Try the simple .text property first (most common) + if hasattr(response, 'text'): + try: + text_content = response.text + if text_content: + print(f" ✅ Extracted {len(text_content)} chars from response.text") + except Exception as e: + print(f" ⚠️ Could not access response.text: {e}") + + # If that didn't work or returned empty, try extracting from candidates + if not text_content: + # CRITICAL FIX: Check if candidates exists AND is not None before iterating + if hasattr(response, 'candidates') and response.candidates is not None: + print(f" 🔍 Extracting from candidates...") + try: + for candidate in response.candidates: + if hasattr(candidate, 'content') and candidate.content: + if hasattr(candidate.content, 'parts') and candidate.content.parts: + for part in candidate.content.parts: + if hasattr(part, 'text') and part.text: + text_content += part.text + elif hasattr(candidate.content, 'text') and candidate.content.text: + text_content += candidate.content.text + + if text_content: + print(f" ✅ Extracted {len(text_content)} chars from candidates") + except TypeError as e: + print(f" ⚠️ Error iterating candidates: {e}") + print(f" 🔍 Candidates type: {type(response.candidates)}") + else: + print(f" ⚠️ No candidates found in response or candidates is None") + + # Log if we still have no content + if not text_content: + print(f" ⚠️ Warning: No text content extracted from Gemini response") + print(f" 🔍 Response attributes: {list(response.__dict__.keys()) if hasattr(response, '__dict__') else 'No __dict__'}") + + # Return with the actual content populated + return UnifiedResponse( + content=text_content, # Properly populated with the actual response text + finish_reason=finish_reason, + raw_response=response, + error_details=error_details if error_details else None + ) + # ========== END OF API CALL SECTION ========== + + except UnifiedClientError as e: + # Re-raise UnifiedClientErrors (including prohibited content) + # This will trigger main key retry in the outer send() method + raise + + except Exception as e: + print(f"Gemini attempt {attempt+1} failed: {e}") + error_details[f'attempt_{attempt+1}'] = str(e) + + # Check if this is a prohibited content error + error_str = str(e).lower() + if any(indicator in error_str for indicator in [ + "content blocked", "prohibited_content", "blockedreason", + "content_filter", "safety filter", "harmful content" + ]): + # Re-raise as UnifiedClientError with proper type + raise UnifiedClientError( + str(e), + error_type="prohibited_content", + details=error_details + ) + + # Check if this is a rate limit error + if "429" in error_str or "rate limit" in error_str.lower(): + # Re-raise for multi-key handling + raise UnifiedClientError( + f"Rate limit exceeded: {e}", + error_type="rate_limit", + http_status=429 + ) + + # For other errors, we might want to retry + if attempt < attempts - 1: + attempt += 1 + wait_time = min(2 ** attempt, 10) # Exponential backoff with max 10s + print(f"⏳ Retrying Gemini in {wait_time}s...") + time.sleep(wait_time) + continue + else: + # Final attempt failed, re-raise + raise + + # If we exhausted all attempts without success + print(f"❌ All {attempts} Gemini attempts failed") + + # Log the failure + self._log_truncation_failure( + messages=messages, + response_content="", + finish_reason='error', + context=self.context, + error_details={'error': 'all_retries_failed', 'provider': 'gemini', 'attempts': attempts, 'details': error_details} + ) + + # Return error response + return UnifiedResponse( + content="", + finish_reason='error', + raw_response=response, + error_details={'error': 'all_retries_failed', 'attempts': attempts, 'details': error_details} + ) + + def _format_prompt(self, messages, *, style: str) -> str: + """ + Format messages into a single prompt string. + style: + - 'gemini': SYSTEM lines as 'INSTRUCTIONS: ...', others 'ROLE: ...' + - 'ai21': SYSTEM as 'Instructions: ...', USER as 'User: ...', ASSISTANT as 'Assistant: ...', ends with 'Assistant: ' + - 'replicate': simple concatenation of SYSTEM, USER, ASSISTANT with labels, no trailing assistant line + """ + formatted_parts = [] + for msg in messages: + role = (msg.get('role') or 'user').upper() + content = msg.get('content', '') + if style == 'gemini': + if role == 'SYSTEM': + formatted_parts.append(f"INSTRUCTIONS: {content}") + else: + formatted_parts.append(f"{role}: {content}") + elif style in ('ai21', 'replicate'): + if role == 'SYSTEM': + label = 'Instructions' if style == 'ai21' else 'System' + formatted_parts.append(f"{label}: {content}") + elif role == 'USER': + formatted_parts.append(f"User: {content}") + elif role == 'ASSISTANT': + formatted_parts.append(f"Assistant: {content}") + else: + formatted_parts.append(f"{role.title()}: {content}") + else: + formatted_parts.append(str(content)) + prompt = "\n\n".join(formatted_parts) + if style == 'ai21': + prompt += "\nAssistant: " + return prompt + + def _send_anthropic(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Anthropic API""" + max_retries = self._get_max_retries() + + headers = { + "X-API-Key": self.api_key, + "Content-Type": "application/json", + "anthropic-version": "2023-06-01" + } + + # Format messages for Anthropic + system_message = None + formatted_messages = [] + for msg in messages: + if msg['role'] == 'system': + system_message = msg['content'] + else: + formatted_messages.append({ + "role": msg['role'], + "content": msg['content'] + }) + + # Get user-configured anti-duplicate parameters + anti_dupe_params = self._get_anti_duplicate_params(temperature) + data = self._build_anthropic_payload(formatted_messages, temperature, max_tokens, anti_dupe_params, system_message) + + resp = self._http_request_with_retries( + method="POST", + url="https://api.anthropic.com/v1/messages", + headers=headers, + json=data, + expected_status=(200,), + max_retries=max_retries, + provider_name="Anthropic" + ) + json_resp = resp.json() + content, finish_reason, usage = self._parse_anthropic_json(json_resp) + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=json_resp + ) + + def _send_mistral(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Mistral API""" + max_retries = self._get_max_retries() + api_delay = self._get_send_interval() + + if MistralClient and hasattr(self, 'mistral_client'): + # Use SDK if available + def _do(): + chat_messages = [] + for msg in messages: + chat_messages.append(ChatMessage(role=msg['role'], content=msg['content'])) + response = self.mistral_client.chat( + model=self.model, + messages=chat_messages, + temperature=temperature, + max_tokens=max_tokens + ) + content = response.choices[0].message.content if response.choices else "" + finish_reason = response.choices[0].finish_reason if response.choices else 'stop' + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + raw_response=response + ) + return self._with_sdk_retries("Mistral", max_retries, _do) + else: + # Use HTTP API + return self._send_openai_compatible( + messages, temperature, max_tokens, + base_url="https://api.mistral.ai/v1", + response_name=response_name, + provider="mistral" + ) + + def _send_cohere(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Cohere API""" + max_retries = self._get_max_retries() + api_delay = self._get_send_interval() + + if cohere and hasattr(self, 'cohere_client'): + # Use SDK with standardized retry wrapper + def _do(): + # Format messages for Cohere + chat_history = [] + message = "" + for msg in messages: + if msg['role'] == 'user': + message = msg['content'] + elif msg['role'] == 'assistant': + chat_history.append({"role": "CHATBOT", "message": msg['content']}) + elif msg['role'] == 'system': + message = msg['content'] + "\n\n" + message + response = self.cohere_client.chat( + model=self.model, + message=message, + chat_history=chat_history, + temperature=temperature, + max_tokens=max_tokens + ) + content = response.text + finish_reason = 'stop' + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + raw_response=response + ) + return self._with_sdk_retries("Cohere", max_retries, _do) + else: + # Use HTTP API with retry logic + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # Format for HTTP API + chat_history = [] + message = "" + + for msg in messages: + if msg['role'] == 'user': + message = msg['content'] + elif msg['role'] == 'assistant': + chat_history.append({"role": "CHATBOT", "message": msg['content']}) + + data = { + "model": self.model, + "message": message, + "chat_history": chat_history, + "temperature": temperature, + "max_tokens": max_tokens + } + + resp = self._http_request_with_retries( + method="POST", + url="https://api.cohere.ai/v1/chat", + headers=headers, + json=data, + expected_status=(200,), + max_retries=max_retries, + provider_name="Cohere" + ) + json_resp = resp.json() + content = json_resp.get("text", "") + return UnifiedResponse( + content=content, + finish_reason='stop', + raw_response=json_resp + ) + + def _send_ai21(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to AI21 API""" + max_retries = self._get_max_retries() + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # Format messages for AI21 + prompt = self._format_prompt(messages, style='ai21') + + data = { + "prompt": prompt, + "temperature": temperature, + "maxTokens": max_tokens + } + + resp = self._http_request_with_retries( + method="POST", + url=f"https://api.ai21.com/studio/v1/{self.model}/complete", + headers=headers, + json=data, + expected_status=(200,), + max_retries=max_retries, + provider_name="AI21" + ) + json_resp = resp.json() + completions = json_resp.get("completions", []) + content = completions[0].get("data", {}).get("text", "") if completions else "" + return UnifiedResponse( + content=content, + finish_reason='stop', + raw_response=json_resp + ) + + + def _send_replicate(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Replicate API""" + max_retries = self._get_max_retries() + api_delay = self._get_send_interval() + + headers = { + "Authorization": f"Token {self.api_key}", + "Content-Type": "application/json" + } + + # Format messages as single prompt + prompt = self._format_prompt(messages, style='replicate') + + # Replicate uses versioned models + data = { + "version": self.model, # Model should be the version ID + "input": { + "prompt": prompt, + "temperature": temperature, + "max_tokens": max_tokens + } + } + + # Create prediction + resp = self._http_request_with_retries( + method="POST", + url="https://api.replicate.com/v1/predictions", + headers=headers, + json=data, + expected_status=(201,), + max_retries=max_retries, + provider_name="Replicate" + ) + prediction = resp.json() + prediction_id = prediction['id'] + + # Poll for result with GUI delay between polls + poll_count = 0 + max_polls = 300 # Maximum 5 minutes at 1 second intervals + + while poll_count < max_polls: + if self._cancelled: + raise UnifiedClientError("Operation cancelled") + + resp = requests.get( + f"https://api.replicate.com/v1/predictions/{prediction_id}", + headers=headers, + timeout=self.request_timeout # Use configured timeout + ) + + if resp.status_code != 200: + raise UnifiedClientError(f"Replicate polling error: {resp.status_code}") + + result = resp.json() + + if result['status'] == 'succeeded': + content = result.get('output', '') + if isinstance(content, list): + content = ''.join(content) + break + elif result['status'] == 'failed': + raise UnifiedClientError(f"Replicate prediction failed: {result.get('error')}") + + # Use GUI delay for polling interval + time.sleep(min(api_delay, 1)) # But at least 1 second + poll_count += 1 + + if poll_count >= max_polls: + raise UnifiedClientError("Replicate prediction timed out") + + return UnifiedResponse( + content=content, + finish_reason='stop', + raw_response=result + ) + + def _send_openai_compatible(self, messages, temperature, max_tokens, base_url, + response_name, provider="generic", headers=None, model_override=None) -> UnifiedResponse: + """Send request to OpenAI-compatible APIs with safety settings""" + max_retries = self._get_max_retries() + api_delay = self._get_send_interval() + + # Determine effective model for this call (do not rely on shared self.model) + if model_override is not None: + effective_model = model_override + else: + # Read instance model under microsecond lock to avoid cross-thread contamination + with self._model_lock: + effective_model = self.model + # Provider-specific model normalization (transport-only) + if provider == 'openrouter': + for prefix in ('or/', 'openrouter/'): + if effective_model.startswith(prefix): + effective_model = effective_model[len(prefix):] + break + effective_model = effective_model.strip() + elif provider == 'fireworks': + if effective_model.startswith('fireworks/'): + effective_model = effective_model[len('fireworks/') :] + if not effective_model.startswith('accounts/'): + effective_model = f"accounts/fireworks/models/{effective_model}" + elif provider == 'chutes': + # Strip the 'chutes/' prefix from the model name if present + if effective_model.startswith('chutes/'): + effective_model = effective_model[7:] # Remove 'chutes/' prefix + + # CUSTOM ENDPOINT OVERRIDE - Check if enabled and override base_url + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + actual_api_key = self.api_key + + # Determine if this is a local endpoint that doesn't need a real API key + is_local_endpoint = False + + # Never override OpenRouter base_url with custom endpoint + if use_custom_endpoint and provider not in ("gemini-openai", "openrouter"): + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + if custom_base_url: + # Check if it's Azure + if '.azure.com' in custom_base_url or '.cognitiveservices' in custom_base_url: + # Azure needs special client + from openai import AzureOpenAI + + deployment = effective_model # Use override or instance model as deployment name + api_version = os.getenv('AZURE_API_VERSION', '2024-12-01-preview') + + # Azure endpoint should be just the base URL + azure_endpoint = custom_base_url.split('/openai')[0] if '/openai' in custom_base_url else custom_base_url + + print(f"🔷 Azure endpoint detected") + print(f" Endpoint: {azure_endpoint}") + print(f" Deployment: {deployment}") + print(f" API Version: {api_version}") + + # Create Azure client + for attempt in range(max_retries): + try: + client = AzureOpenAI( + api_key=actual_api_key, + api_version=api_version, + azure_endpoint=azure_endpoint + ) + + # Build params with correct token parameter based on model + params = { + "model": deployment, + "messages": messages, + "temperature": temperature + } + + # Normalize token parameter for Azure endpoint + norm_max_tokens, norm_max_completion_tokens = self._normalize_token_params(max_tokens, None) + if norm_max_completion_tokens is not None: + params["max_completion_tokens"] = norm_max_completion_tokens + elif norm_max_tokens is not None: + params["max_tokens"] = norm_max_tokens + + # Use Idempotency-Key via headers for compatibility + idem_key = self._get_idempotency_key() + response = client.chat.completions.create( + **params, + extra_headers={"Idempotency-Key": idem_key} + ) + + # Extract response + content = response.choices[0].message.content if response.choices else "" + finish_reason = response.choices[0].finish_reason if response.choices else "stop" + + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + raw_response=response + ) + + except Exception as e: + error_str = str(e).lower() + + # Check if this is a content filter error FIRST + if ("content_filter" in error_str or + "responsibleaipolicyviolation" in error_str or + "content management policy" in error_str or + "the response was filtered" in error_str): + + # This is a content filter error - raise it immediately as prohibited_content + print(f"Azure content filter detected: {str(e)[:100]}") + raise UnifiedClientError( + f"Azure content blocked: {e}", + error_type="prohibited_content", + http_status=400, + details={"provider": "azure", "original_error": str(e)} + ) + + # Only retry for non-content-filter errors + if attempt < max_retries - 1: + print(f"Azure error (attempt {attempt + 1}): {e}") + time.sleep(api_delay) + continue + + raise UnifiedClientError(f"Azure error: {e}") + + # Not Azure, continue with regular custom endpoint + base_url = custom_base_url + print(f"🔄 Custom endpoint enabled: Overriding {provider} endpoint") + print(f" Original: {base_url}") + print(f" Override: {custom_base_url}") + + # Check if it's Azure + if '.azure.com' in custom_base_url or '.cognitiveservices' in custom_base_url: + # Azure needs special handling + deployment = self.model # Use model as deployment name + api_version = os.getenv('AZURE_API_VERSION', '2024-08-01-preview') + + # Fix Azure URL format + if '/openai/deployments/' not in custom_base_url: + custom_base_url = f"{custom_base_url.rstrip('/')}/openai/deployments/{deployment}/chat/completions?api-version={api_version}" + + # Azure uses different auth header + if headers is None: + headers = {} + headers['api-key'] = actual_api_key + headers.pop('Authorization', None) # Remove OpenAI auth + + print(f"🔷 Azure endpoint detected: {custom_base_url}") + + base_url = custom_base_url + + # Check if it's a local endpoint + local_indicators = [ + 'localhost', '127.0.0.1', '0.0.0.0', + '192.168.', '10.', '172.16.', '172.17.', '172.18.', '172.19.', + '172.20.', '172.21.', '172.22.', '172.23.', '172.24.', '172.25.', + '172.26.', '172.27.', '172.28.', '172.29.', '172.30.', '172.31.', + ':11434', # Ollama default port + ':8080', # Common local API port + ':5000', # Common local API port + ':8000', # Common local API port + ':1234', # LM Studio default port + 'host.docker.internal', # Docker host + ] + + # Also check if user explicitly marked it as local + is_local_llm_env = os.getenv('IS_LOCAL_LLM', '0') == '1' + + is_local_endpoint = is_local_llm_env or any(indicator in custom_base_url.lower() for indicator in local_indicators) + + if is_local_endpoint: + actual_api_key = "dummy-key-for-local-llm" + #print(f" 📍 Detected local endpoint, using dummy API key") + else: + #print(f" ☁️ Using actual API key for cloud endpoint") + pass + + # For all other providers, use the actual API key + # Remove the special case for gemini-openai - it needs the real API key + if not is_local_endpoint: + #print(f" Using actual API key for {provider}") + pass + + # Check if safety settings are disabled via GUI toggle + disable_safety = os.getenv("DISABLE_GEMINI_SAFETY", "false").lower() == "true" + + # Debug logging for ElectronHub + if provider == "electronhub": + logger.debug(f"ElectronHub API call - Messages structure:") + for i, msg in enumerate(messages): + logger.debug(f" Message {i}: role='{msg.get('role')}', content_length={len(msg.get('content', ''))}") + if msg.get('role') == 'system': + logger.debug(f" System prompt preview: {msg.get('content', '')[:100]}...") + + # Use OpenAI SDK for providers known to work well with it + sdk_compatible = ['deepseek', 'together', 'mistral', 'yi', 'qwen', 'moonshot', 'groq', + 'electronhub', 'openrouter', 'fireworks', 'xai', 'gemini-openai', 'chutes'] + + # Allow forcing HTTP-only for OpenRouter via toggle (default: enabled) + openrouter_http_only = os.getenv('OPENROUTER_USE_HTTP_ONLY', '0') == '1' + if provider == 'openrouter' and openrouter_http_only: + print("OpenRouter HTTP-only mode enabled — using direct HTTP client") + + if openai and provider in sdk_compatible and not (provider == 'openrouter' and openrouter_http_only): + # Use OpenAI SDK with custom base URL + for attempt in range(max_retries): + try: + if self._cancelled: + raise UnifiedClientError("Operation cancelled") + + client = self._get_openai_client(base_url=base_url, api_key=actual_api_key) + + # Check if this is Gemini via OpenAI endpoint + is_gemini_endpoint = provider == "gemini-openai" or effective_model.lower().startswith('gemini') + + # Get user-configured anti-duplicate parameters + anti_dupe_params = self._get_anti_duplicate_params(temperature) + + # Enforce fixed temperature for o-series (e.g., GPT-5) to avoid 400s + req_temperature = temperature + try: + if self._is_o_series_model(): + req_temperature = 1.0 + except Exception: + pass + + norm_max_tokens, norm_max_completion_tokens = self._normalize_token_params(max_tokens, None) + # Targeted preflight for OpenRouter free Gemma variant only + try: + if provider == 'openrouter': + ml = (effective_model or '').lower().strip() + if ml == 'google/gemma-3-27b-it:free' and any(isinstance(m, dict) and m.get('role') == 'system' for m in messages): + messages = self._merge_system_into_user(messages) + print("🔁 Preflight: merged system prompt into user for google/gemma-3-27b-it:free (SDK)") + try: + payload_name, _ = self._get_file_names(messages, context=getattr(self, 'context', 'translation')) + self._save_payload(messages, payload_name, retry_reason="preflight_gemma_no_system") + except Exception: + pass + except Exception: + pass + + params = { + "model": effective_model, + "messages": messages, + "temperature": req_temperature, + **anti_dupe_params + } + if norm_max_completion_tokens is not None: + params["max_completion_tokens"] = norm_max_completion_tokens + elif norm_max_tokens is not None: + params["max_tokens"] = norm_max_tokens + + # Use extra_body for provider-specific fields the SDK doesn't type-accept + extra_body = {} + + # Inject OpenRouter reasoning configuration (effort/max_tokens) via extra_body + if provider == 'openrouter': + try: + enable_gpt = os.getenv('ENABLE_GPT_THINKING', '0') == '1' + if enable_gpt: + reasoning = {"enabled": True, "exclude": True} + tokens_str = (os.getenv('GPT_REASONING_TOKENS', '') or '').strip() + if tokens_str.isdigit() and int(tokens_str) > 0: + reasoning.pop('effort', None) + reasoning["max_tokens"] = int(tokens_str) + else: + effort = (os.getenv('GPT_EFFORT', 'medium') or 'medium').lower() + if effort not in ('low', 'medium', 'high'): + effort = 'medium' + reasoning.pop('max_tokens', None) + reasoning["effort"] = effort + extra_body["reasoning"] = reasoning + except Exception: + pass + + # Add safety parameters for providers that support them + # Note: Together AI doesn't support the 'moderation' parameter + if disable_safety and provider in ["groq", "fireworks"]: + extra_body["moderation"] = False + logger.info(f"🔓 Safety moderation disabled for {provider}") + elif disable_safety and provider == "together": + # Together AI handles safety differently - no moderation parameter + logger.info(f"🔓 Safety settings note: {provider} doesn't support moderation parameter") + + # Use Idempotency-Key header to avoid unsupported kwarg on some endpoints + idem_key = self._get_idempotency_key() + extra_headers = {"Idempotency-Key": idem_key} + if provider == 'openrouter': + # OpenRouter requires Referer and Title; also request JSON explicitly + extra_headers.update({ + "HTTP-Referer": os.getenv('OPENROUTER_REFERER', 'https://github.com/Shirochi-stack/Glossarion'), + "X-Title": os.getenv('OPENROUTER_APP_NAME', 'Glossarion Translation'), + "X-Proxy-TTL": "0", + "Accept": "application/json", + "Cache-Control": "no-cache", + }) + if os.getenv('OPENROUTER_ACCEPT_IDENTITY', '0') == '1': + extra_headers["Accept-Encoding"] = "identity" + + # Build call kwargs and include extra_body only when present + call_kwargs = { + **params, + "extra_headers": extra_headers, + } + if extra_body: + call_kwargs["extra_body"] = extra_body + + resp = client.chat.completions.create(**call_kwargs) + + # Enhanced extraction for Gemini endpoints + content = None + finish_reason = 'stop' + + # Extract content with Gemini awareness + if hasattr(resp, 'choices') and resp.choices: + choice = resp.choices[0] + + if hasattr(choice, 'finish_reason'): + finish_reason = choice.finish_reason or 'stop' + + if hasattr(choice, 'message'): + message = choice.message + if message is None: + content = "" + if is_gemini_endpoint: + content = "[GEMINI RETURNED NULL MESSAGE]" + finish_reason = 'content_filter' + elif hasattr(message, 'content'): + content = message.content or "" + if content is None and is_gemini_endpoint: + content = "[BLOCKED BY GEMINI SAFETY FILTER]" + finish_reason = 'content_filter' + elif hasattr(message, 'text'): + content = message.text + elif isinstance(message, str): + content = message + else: + content = str(message) if message else "" + elif hasattr(choice, 'text'): + content = choice.text + else: + content = "" + else: + content = "" + + # Normalize finish reasons + if finish_reason in ["max_tokens", "max_length"]: + finish_reason = "length" + + usage = None + if hasattr(resp, 'usage'): + usage = { + 'prompt_tokens': resp.usage.prompt_tokens, + 'completion_tokens': resp.usage.completion_tokens, + 'total_tokens': resp.usage.total_tokens + } + + self._save_response(content, response_name) + + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=resp + ) + + except Exception as e: + error_str = str(e).lower() + if "rate limit" in error_str or "429" in error_str or "quota" in error_str: + # Preserve the full error message from OpenRouter/ElectronHub + raise UnifiedClientError(str(e), error_type="rate_limit") + # Fallback: If SDK has trouble parsing OpenRouter response, retry via direct HTTP with full diagnostics + if provider == 'openrouter' and ("expecting value" in error_str or "json" in error_str): + try: + print("OpenRouter SDK parse error — falling back to HTTP path for this attempt") + # Save the SDK parse error to failed_requests with traceback + try: + self._save_failed_request(messages, e, getattr(self, 'context', 'general')) + except Exception: + pass + # Build headers + http_headers = self._build_openai_headers(provider, actual_api_key, headers) + http_headers['HTTP-Referer'] = os.getenv('OPENROUTER_REFERER', 'https://github.com/Shirochi-stack/Glossarion') + http_headers['X-Title'] = os.getenv('OPENROUTER_APP_NAME', 'Glossarion Translation') + http_headers['X-Proxy-TTL'] = '0' + http_headers['Accept'] = 'application/json' + http_headers['Cache-Control'] = 'no-cache' + if os.getenv('OPENROUTER_ACCEPT_IDENTITY', '0') == '1': + http_headers['Accept-Encoding'] = 'identity' + # Build body similar to HTTP branch + norm_max_tokens, norm_max_completion_tokens = self._normalize_token_params(max_tokens, None) + body = { + "model": effective_model, + "messages": messages, + "temperature": req_temperature, + } + if norm_max_completion_tokens is not None: + body["max_completion_tokens"] = norm_max_completion_tokens + elif norm_max_tokens is not None: + body["max_tokens"] = norm_max_tokens + # Reasoning (OpenRouter-only) + try: + enable_gpt = os.getenv('ENABLE_GPT_THINKING', '0') == '1' + if enable_gpt: + reasoning = {"enabled": True, "exclude": True} + tokens_str = (os.getenv('GPT_REASONING_TOKENS', '') or '').strip() + if tokens_str.isdigit() and int(tokens_str) > 0: + reasoning["max_tokens"] = int(tokens_str) + else: + effort = (os.getenv('GPT_EFFORT', 'medium') or 'medium').lower() + if effort not in ('low', 'medium', 'high'): + effort = 'medium' + reasoning["effort"] = effort + body["reasoning"] = reasoning + except Exception: + pass + # Make HTTP request + endpoint = "/chat/completions" + http_headers["Idempotency-Key"] = self._get_idempotency_key() + resp = self._http_request_with_retries( + method="POST", + url=f"{base_url}{endpoint}", + headers=http_headers, + json=body, + expected_status=(200,), + max_retries=1, + provider_name="OpenRouter (HTTP)", + use_session=True + ) + json_resp = resp.json() + choices = json_resp.get("choices", []) + if not choices: + raise UnifiedClientError("OpenRouter (HTTP) returned no choices") + content, finish_reason, usage = self._extract_openai_json(json_resp) + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=json_resp + ) + except Exception as http_e: + # Surface detailed diagnostics + raise UnifiedClientError( + f"OpenRouter HTTP fallback failed: {http_e}", + error_type="parse_error" + ) + if not self._multi_key_mode and attempt < max_retries - 1: + print(f"{provider} SDK error (attempt {attempt + 1}): {e}") + time.sleep(api_delay) + continue + elif self._multi_key_mode: + raise UnifiedClientError(f"{provider} error: {e}", error_type="api_error") + raise UnifiedClientError(f"{provider} SDK error: {e}") + else: + # Use HTTP API with retry logic + headers = self._build_openai_headers(provider, actual_api_key, headers) + # Provider-specific header tweaks + if provider == 'openrouter': + headers['HTTP-Referer'] = os.getenv('OPENROUTER_REFERER', 'https://github.com/Shirochi-stack/Glossarion') + headers['X-Title'] = os.getenv('OPENROUTER_APP_NAME', 'Glossarion Translation') + headers['X-Proxy-TTL'] = '0' + headers['Cache-Control'] = 'no-cache' + if os.getenv('OPENROUTER_ACCEPT_IDENTITY', '0') == '1': + headers['Accept-Encoding'] = 'identity' + elif provider == 'zhipu': + headers["Authorization"] = f"Bearer {actual_api_key}" + elif provider == 'baidu': + headers["Content-Type"] = "application/json" + # Normalize token parameter (o-series: max_completion_tokens; others: max_tokens) + norm_max_tokens, norm_max_completion_tokens = self._normalize_token_params(max_tokens, None) + + # Enforce fixed temperature for o-series (e.g., GPT-5) to avoid 400s + req_temperature = temperature + try: + if provider == 'openai' and self._is_o_series_model(): + req_temperature = 1.0 + except Exception: + pass + + # Targeted preflight for OpenRouter free Gemma variant only + try: + if provider == 'openrouter': + ml = (effective_model or '').lower().strip() + if ml == 'google/gemma-3-27b-it:free' and any(isinstance(m, dict) and m.get('role') == 'system' for m in messages): + messages = self._merge_system_into_user(messages) + print("🔁 Preflight (HTTP): merged system prompt into user for google/gemma-3-27b-it:free") + try: + payload_name, _ = self._get_file_names(messages, context=getattr(self, 'context', 'translation')) + self._save_payload(messages, payload_name, retry_reason="preflight_gemma_no_system") + except Exception: + pass + except Exception: + pass + + data = { + "model": effective_model, + "messages": messages, + "temperature": req_temperature, + } + if norm_max_completion_tokens is not None: + data["max_completion_tokens"] = norm_max_completion_tokens + elif norm_max_tokens is not None: + data["max_tokens"] = norm_max_tokens + + # Inject OpenRouter reasoning configuration (effort/max_tokens) + if provider == 'openrouter': + try: + enable_gpt = os.getenv('ENABLE_GPT_THINKING', '0') == '1' + if enable_gpt: + reasoning = {"enabled": True, "exclude": True} + tokens_str = (os.getenv('GPT_REASONING_TOKENS', '') or '').strip() + if tokens_str.isdigit() and int(tokens_str) > 0: + reasoning.pop('effort', None) + reasoning["max_tokens"] = int(tokens_str) + else: + effort = (os.getenv('GPT_EFFORT', 'medium') or 'medium').lower() + if effort not in ('low', 'medium', 'high'): + effort = 'medium' + reasoning.pop('max_tokens', None) + reasoning["effort"] = effort + data["reasoning"] = reasoning + except Exception: + pass + + # Add Perplexity-specific options for Sonar models + if provider == 'perplexity' and 'sonar' in effective_model.lower(): + data['search_domain_filter'] = ['perplexity.ai'] + data['return_citations'] = True + data['search_recency_filter'] = 'month' + + # Apply safety flags + self._apply_openai_safety(provider, disable_safety, data, headers) + # Save OpenRouter config if requested + if provider == 'openrouter' and os.getenv("SAVE_PAYLOAD", "1") == "1": + cfg = { + "provider": "openrouter", + "timestamp": datetime.now().isoformat(), + "model": effective_model, + "safety_disabled": disable_safety, + "temperature": temperature, + "max_tokens": max_tokens + } + # Persist reasoning config in saved debug file + try: + enable_gpt = os.getenv('ENABLE_GPT_THINKING', '0') == '1' + if enable_gpt: + reasoning = {"enabled": True, "exclude": True} + tokens_str = (os.getenv('GPT_REASONING_TOKENS', '') or '').strip() + if tokens_str.isdigit() and int(tokens_str) > 0: + reasoning.pop('effort', None) + reasoning["max_tokens"] = int(tokens_str) + else: + effort = (os.getenv('GPT_EFFORT', 'medium') or 'medium').lower() + if effort not in ('low', 'medium', 'high'): + effort = 'medium' + reasoning.pop('max_tokens', None) + reasoning["effort"] = effort + cfg["reasoning"] = reasoning + except Exception: + pass + self._save_openrouter_config(cfg, response_name) + # Endpoint and idempotency + endpoint = "/chat/completions" + headers["Idempotency-Key"] = self._get_idempotency_key() + resp = self._http_request_with_retries( + method="POST", + url=f"{base_url}{endpoint}", + headers=headers, + json=data, + expected_status=(200,), + max_retries=max_retries, + provider_name=provider, + use_session=True + ) + # Safely parse JSON with diagnostics for non-JSON bodies + try: + ct = (resp.headers.get('content-type') or '').lower() + if 'application/json' not in ct: + snippet = resp.text[:1200] if hasattr(resp, 'text') else '' + # Log failed request snapshot + try: + self._save_failed_request(messages, f"non-JSON content-type: {ct}", getattr(self, 'context', 'general'), response=snippet) + except Exception: + pass + raise UnifiedClientError( + f"{provider} returned non-JSON content-type: {ct or 'unknown'} | snippet: {snippet}", + error_type="parse_error", + http_status=resp.status_code, + details={"content_type": ct, "snippet": snippet} + ) + json_resp = resp.json() + except Exception as je: + # If this is a JSON decode error, surface a helpful message + import json as _json + if isinstance(je, UnifiedClientError): + raise + try: + # detect common JSON decode exceptions without importing vendor-specific types + if 'Expecting value' in str(je) or 'JSONDecodeError' in str(type(je)): + snippet = resp.text[:1200] if hasattr(resp, 'text') else '' + try: + self._save_failed_request(messages, f"json-parse-failed: {je}", getattr(self, 'context', 'general'), response=snippet) + except Exception: + pass + raise UnifiedClientError( + f"{provider} JSON parse failed: {je} | snippet: {snippet}", + error_type="parse_error", + http_status=resp.status_code, + details={"content_type": ct, "snippet": snippet} + ) + except Exception: + pass + # Re-raise unknown parsing exceptions + raise + + choices = json_resp.get("choices", []) + if not choices: + raise UnifiedClientError(f"{provider} API returned no choices") + content, finish_reason, usage = self._extract_openai_json(json_resp) + # ElectronHub truncation detection + if provider == "electronhub" and content: + if len(content) < 50 and "cannot" in content.lower(): + finish_reason = "content_filter" + print(f"ElectronHub likely refused content: {content[:100]}") + elif finish_reason == "stop": + if self._detect_silent_truncation(content, messages, self.context): + finish_reason = "length" + print("ElectronHub reported 'stop' but content appears truncated") + print(f"🔍 ElectronHub: Detected silent truncation despite 'stop' status") + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=json_resp + ) + + def _send_openai(self, messages, temperature, max_tokens, max_completion_tokens, response_name) -> UnifiedResponse: + """Send request to OpenAI API with proper token parameter handling""" + # CRITICAL: Check if individual endpoint is applied first + if (hasattr(self, '_individual_endpoint_applied') and self._individual_endpoint_applied and + hasattr(self, 'openai_client') and self.openai_client): + individual_base_url = getattr(self.openai_client, 'base_url', None) + if individual_base_url: + base_url = str(individual_base_url).rstrip('/') + else: + base_url = 'https://api.openai.com/v1' + else: + # Fallback to global custom endpoint logic + custom_base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', '') + use_custom_endpoint = os.getenv('USE_CUSTOM_OPENAI_ENDPOINT', '0') == '1' + + if custom_base_url and use_custom_endpoint: + base_url = custom_base_url + else: + base_url = 'https://api.openai.com/v1' + + # For OpenAI, we need to handle max_completion_tokens properly + return self._send_openai_compatible( + messages=messages, + temperature=temperature, + max_tokens=max_tokens if not max_completion_tokens else max_completion_tokens, + base_url=base_url, + response_name=response_name, + provider="openai" + ) + + def _send_openai_provider_router(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Generic router for many OpenAI-compatible providers to reduce wrapper duplication.""" + provider = self._get_actual_provider() + + # Provider URL mapping dictionary + provider_urls = { + 'yi': lambda: os.getenv("YI_API_BASE_URL", "https://api.01.ai/v1"), + 'qwen': "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + 'baichuan': "https://api.baichuan-ai.com/v1", + 'zhipu': "https://open.bigmodel.cn/api/paas/v4", + 'moonshot': "https://api.moonshot.cn/v1", + 'groq': lambda: os.getenv("GROQ_API_URL", "https://api.groq.com/openai/v1"), + 'baidu': "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop", + 'tencent': "https://hunyuan.cloud.tencent.com/v1", + 'iflytek': "https://spark-api.xf-yun.com/v1", + 'bytedance': "https://maas-api.vercel.app/v1", + 'minimax': "https://api.minimax.chat/v1", + 'sensenova': "https://api.sensenova.cn/v1", + 'internlm': "https://api.internlm.org/v1", + 'tii': "https://api.tii.ae/v1", + 'microsoft': "https://api.microsoft.com/v1", + 'databricks': lambda: f"{os.getenv('DATABRICKS_API_URL', 'https://YOUR-WORKSPACE.databricks.com')}/serving/endpoints", + 'together': "https://api.together.xyz/v1", + 'openrouter': "https://openrouter.ai/api/v1", + 'fireworks': lambda: os.getenv("FIREWORKS_API_URL", "https://api.fireworks.ai/inference/v1"), + 'xai': lambda: os.getenv("XAI_API_URL", "https://api.x.ai/v1"), + 'deepseek': lambda: os.getenv("DEEPSEEK_API_URL", "https://api.deepseek.com/v1"), + 'perplexity': "https://api.perplexity.ai", + 'chutes': lambda: os.getenv("CHUTES_API_URL", "https://llm.chutes.ai/v1"), + 'salesforce': lambda: os.getenv("SALESFORCE_API_URL", "https://api.salesforce.com/v1"), + 'bigscience': "https://api.together.xyz/v1", # Together AI fallback + 'meta': "https://api.together.xyz/v1" # Together AI fallback + } + + # Get base URL from mapping + url_spec = provider_urls.get(provider) + if url_spec: + base_url = url_spec() if callable(url_spec) else url_spec + else: + # Fallback to base OpenAI-compatible flow if unknown + base_url = os.getenv('OPENAI_CUSTOM_BASE_URL', 'https://api.openai.com/v1') + + return self._send_openai_compatible( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + base_url=base_url, + response_name=response_name, + provider=provider + ) + + def _send_azure(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Azure OpenAI""" + # Prefer per-key (individual) endpoint/version when present, then fall back to env vars + endpoint = getattr(self, 'azure_endpoint', None) or \ + getattr(self, 'current_key_azure_endpoint', None) or \ + os.getenv("AZURE_OPENAI_ENDPOINT", "https://YOUR-RESOURCE.openai.azure.com") + api_version = getattr(self, 'azure_api_version', None) or \ + getattr(self, 'current_key_azure_api_version', None) or \ + os.getenv("AZURE_API_VERSION", "2024-02-01") + + if endpoint and not endpoint.startswith(("http://", "https://")): + endpoint = "https://" + endpoint + + headers = { + "api-key": self.api_key, + "Content-Type": "application/json" + } + + # Azure uses a different URL structure + base_url = f"{endpoint.rstrip('/')}/openai/deployments/{self.model}" + url = f"{base_url}/chat/completions?api-version={api_version}" + + data = { + "messages": messages, + "temperature": temperature + } + + # Use _is_o_series_model to determine which token parameter to use + if self._is_o_series_model(): + data["max_completion_tokens"] = max_tokens + else: + data["max_tokens"] = max_tokens + + try: + resp = requests.post( + url, + headers=headers, + json=data, + timeout=self.request_timeout + ) + + if resp.status_code != 200: + # Treat all 400s as prohibited_content to trigger fallback keys cleanly + if resp.status_code == 400: + raise UnifiedClientError( + f"Azure OpenAI error: {resp.status_code} - {resp.text}", + error_type="prohibited_content", + http_status=400 + ) + # Other errors propagate normally with status code + raise UnifiedClientError( + f"Azure OpenAI error: {resp.status_code} - {resp.text}", + http_status=resp.status_code + ) + + json_resp = resp.json() + content, finish_reason, usage = self._extract_openai_json(json_resp) + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=json_resp + ) + + except Exception as e: + print(f"Azure OpenAI error: {e}") + raise UnifiedClientError(f"Azure OpenAI error: {e}") + + def _send_google_palm(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Google PaLM API""" + # PaLM is being replaced by Gemini, but included for completeness + return self._send_gemini(messages, temperature, max_tokens, response_name) + + def _send_alephalpha(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to Aleph Alpha API""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # Format messages for Aleph Alpha (simple concatenation) + prompt = self._format_prompt(messages, style='replicate') + + data = { + "model": self.model, + "prompt": prompt, + "maximum_tokens": max_tokens, + "temperature": temperature + } + + try: + resp = self._http_request_with_retries( + method="POST", + url="https://api.aleph-alpha.com/complete", + headers=headers, + json=data, + expected_status=(200,), + max_retries=3, + provider_name="AlephAlpha" + ) + json_resp = resp.json() + content = json_resp.get('completions', [{}])[0].get('completion', '') + + return UnifiedResponse( + content=content, + finish_reason='stop', + raw_response=json_resp + ) + + except Exception as e: + print(f"Aleph Alpha error: {e}") + raise UnifiedClientError(f"Aleph Alpha error: {e}") + + def _send_huggingface(self, messages, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send request to HuggingFace Inference API""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + # Format messages for HuggingFace (simple concatenation) + prompt = self._format_prompt(messages, style='replicate') + + data = { + "inputs": prompt, + "parameters": { + "max_new_tokens": max_tokens, + "temperature": temperature, + "return_full_text": False + } + } + + try: + resp = self._http_request_with_retries( + method="POST", + url=f"https://api-inference.huggingface.co/models/{self.model}", + headers=headers, + json=data, + expected_status=(200,), + max_retries=3, + provider_name="HuggingFace" + ) + json_resp = resp.json() + content = "" + if isinstance(json_resp, list) and json_resp: + content = json_resp[0].get('generated_text', '') + + return UnifiedResponse( + content=content, + finish_reason='stop', + raw_response=json_resp + ) + + except Exception as e: + print(f"HuggingFace error: {e}") + raise UnifiedClientError(f"HuggingFace error: {e}") + + def _send_vertex_model_garden_image(self, messages, image_base64, temperature, max_tokens, response_name): + """Send image request to Vertex AI Model Garden""" + # For now, we can just call the regular send method since Vertex AI + # handles images in the message format + + # Convert image to message format that Vertex AI expects + image_message = { + "role": "user", + "content": [ + {"type": "text", "text": messages[-1]['content'] if messages else ""}, + {"type": "image", "image": {"base64": image_base64}} + ] + } + + # Replace last message with image message + messages_with_image = messages[:-1] + [image_message] + + # Use the regular Vertex AI send method + return self._send_vertex_model_garden(messages_with_image, temperature, max_tokens, response_name=response_name) + + def _is_o_series_model(self) -> bool: + """Check if the current model is an o-series model (o1, o3, o4, etc.) or GPT-5""" + if not self.model: + return False + + model_lower = self.model.lower() + + # Check for specific patterns + if 'o1-preview' in model_lower or 'o1-mini' in model_lower: + return True + + # Check for o3 models + if 'o3-mini' in model_lower or 'o3-pro' in model_lower: + return True + + # Check for o4 models + if 'o4-mini' in model_lower: + return True + + # Check for GPT-5 models (including variants) + if 'gpt-5' in model_lower or 'gpt5' in model_lower: + return True + + # Check if it starts with o followed by a digit + if len(model_lower) >= 2 and model_lower[0] == 'o' and model_lower[1].isdigit(): + return True + + return False + + def _prepare_gemini_image_content(self, messages, image_base64): + """Prepare image content for Gemini API - supports both single and multiple images""" + + # Check if this is a multi-image request (messages contain content arrays) + is_multi_image = False + for msg in messages: + if isinstance(msg.get('content'), list): + for part in msg['content']: + if part.get('type') == 'image_url': + is_multi_image = True + break + + if is_multi_image: + # Handle multi-image format + contents = [] + + for msg in messages: + if msg['role'] == 'system': + contents.append({ + "role": "user", + "parts": [{"text": f"Instructions: {msg['content']}"}] + }) + elif msg['role'] == 'user': + if isinstance(msg['content'], str): + contents.append({ + "role": "user", + "parts": [{"text": msg['content']}] + }) + elif isinstance(msg['content'], list): + parts = [] + for part in msg['content']: + if part['type'] == 'text': + parts.append({"text": part['text']}) + elif part['type'] == 'image_url': + image_data = part['image_url']['url'] + if image_data.startswith('data:'): + base64_data = image_data.split(',')[1] + else: + base64_data = image_data + + mime_type = "image/png" + if 'jpeg' in image_data or 'jpg' in image_data: + mime_type = "image/jpeg" + elif 'webp' in image_data: + mime_type = "image/webp" + + parts.append({ + "inline_data": { + "mime_type": mime_type, + "data": base64_data + } + }) + + contents.append({ + "role": "user", + "parts": parts + }) + else: + # Handle single image format (backward compatibility) + formatted_parts = [] + for msg in messages: + if msg.get('role') == 'system': + formatted_parts.append(f"Instructions: {msg['content']}") + elif msg.get('role') == 'user': + formatted_parts.append(f"User: {msg['content']}") + + text_prompt = "\n\n".join(formatted_parts) + + contents = [ + { + "role": "user", + "parts": [ + {"text": text_prompt}, + {"inline_data": { + "mime_type": "image/jpeg", + "data": image_base64 + }} + ] + } + ] + + return contents + + # Removed: _send_openai_image + # OpenAI-compatible providers handle images within messages via _get_response and _send_openai_compatible + + def _send_anthropic_image(self, messages, image_base64, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send image request to Anthropic API""" + headers = { + "X-API-Key": self.api_key, + "Content-Type": "application/json", + "anthropic-version": "2023-06-01" + } + + # Format messages with image + system_message = None + formatted_messages = [] + + for msg in messages: + if msg['role'] == 'system': + system_message = msg['content'] + elif msg['role'] == 'user': + # Add image to user message + formatted_messages.append({ + "role": "user", + "content": [ + { + "type": "text", + "text": msg['content'] + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_base64 + } + } + ] + }) + else: + formatted_messages.append({ + "role": msg['role'], + "content": msg['content'] + }) + + data = { + "model": self.model, + "messages": formatted_messages, + "temperature": temperature, + "max_tokens": max_tokens + } + + # Get user-configured anti-duplicate parameters + anti_dupe_params = self._get_anti_duplicate_params(temperature) + data.update(anti_dupe_params) # Add user's custom parameters + + if system_message: + data["system"] = system_message + + try: + resp = self._http_request_with_retries( + method="POST", + url="https://api.anthropic.com/v1/messages", + headers=headers, + json=data, + expected_status=(200,), + max_retries=3, + provider_name="Anthropic Image" + ) + json_resp = resp.json() + content, finish_reason, usage = self._parse_anthropic_json(json_resp) + return UnifiedResponse( + content=content, + finish_reason=finish_reason, + usage=usage, + raw_response=json_resp + ) + + except Exception as e: + print(f"Anthropic Vision API error: {e}") + raise UnifiedClientError(f"Anthropic Vision API error: {e}") + + # Removed: _send_electronhub_image (handled via _send_openai_compatible in _get_response) + + def _send_poe_image(self, messages, image_base64, temperature, max_tokens, response_name) -> UnifiedResponse: + """Send image request using poe-api-wrapper""" + try: + from poe_api_wrapper import PoeApi + except ImportError: + raise UnifiedClientError( + "poe-api-wrapper not installed. Run: pip install poe-api-wrapper" + ) + + # Parse cookies using robust parser + tokens = self._parse_poe_tokens(self.api_key) + if 'p-b' not in tokens or not tokens['p-b']: + raise UnifiedClientError( + "POE tokens missing. Provide cookies as 'p-b:VALUE|p-lat:VALUE' or 'p-b=VALUE; p-lat=VALUE'", + error_type="auth_error" + ) + if 'p-lat' not in tokens: + tokens['p-lat'] = '' + logger.info("No p-lat cookie provided; proceeding without it") + + logger.info(f"Tokens being sent for image: p-b={len(tokens.get('p-b', ''))} chars, p-lat={len(tokens.get('p-lat', ''))} chars") + + try: + # Create Poe client (try to pass proxy/headers if supported) + poe_kwargs = {} + ua = os.getenv("POE_USER_AGENT") or os.getenv("HTTP_USER_AGENT") + if ua: + poe_kwargs["headers"] = {"User-Agent": ua, "Referer": "https://poe.com/", "Origin": "https://poe.com"} + proxy = os.getenv("POE_PROXY") or os.getenv("HTTPS_PROXY") or os.getenv("HTTP_PROXY") + if proxy: + poe_kwargs["proxy"] = proxy + try: + poe_client = PoeApi(tokens=tokens, **poe_kwargs) + except TypeError: + poe_client = PoeApi(tokens=tokens) + try: + if ua and hasattr(poe_client, "session") and hasattr(poe_client.session, "headers"): + poe_client.session.headers.update({"User-Agent": ua, "Referer": "https://poe.com/", "Origin": "https://poe.com"}) + except Exception: + pass + + # Get bot name - use vision-capable bots + requested_model = self.model.replace('poe/', '', 1) + bot_map = { + # Vision-capable models + 'gpt-4-vision': 'GPT-4V', + 'gpt-4v': 'GPT-4V', + 'claude-3-opus': 'claude_3_opus', # Claude 3 models support vision + 'claude-3-sonnet': 'claude_3_sonnet', + 'claude-3-haiku': 'claude_3_haiku', + 'gemini-pro-vision': 'gemini_pro_vision', + 'gemini-2.5-flash': 'gemini_1_5_flash', # Gemini 1.5 supports vision + 'gemini-2.5-pro': 'gemini_1_5_pro', + + # Fallback to regular models + 'gpt-4': 'beaver', + 'claude': 'a2', + 'assistant': 'assistant', + } + bot_name = bot_map.get(requested_model.lower(), requested_model) + logger.info(f"Using bot name for vision: {bot_name}") + + # Convert messages to prompt + prompt = self._format_prompt(messages, style='replicate') + + # Note: poe-api-wrapper's image support varies by version + # Some versions support file_path parameter, others need different approaches + full_response = "" + + # POE file_path support is inconsistent; fall back to plain prompt + for chunk in poe_client.send_message(bot_name, prompt): + if 'response' in chunk: + full_response = chunk['response'] + + except Exception as img_error: + print(f"Image handling error: {img_error}") + # Fall back to text-only message + print("Falling back to text-only message due to image error") + for chunk in poe_client.send_message(bot_name, prompt): + if 'response' in chunk: + full_response = chunk['response'] + + # Get the final text + final_text = chunk.get('text', full_response) if 'chunk' in locals() else full_response + + if not final_text: + raise UnifiedClientError( + "POE returned empty response for image. " + "The bot may not support image inputs or the image format is unsupported." + ) + + return UnifiedResponse( + content=final_text, + finish_reason="stop", + raw_response=chunk if 'chunk' in locals() else {"response": full_response} + ) + + except Exception as e: + print(f"Poe image API error details: {str(e)}") + error_str = str(e).lower() + + if self._is_rate_limit_error(e): + raise UnifiedClientError( + "POE rate limit exceeded. Please wait before trying again.", + error_type="rate_limit" + ) + elif "auth" in error_str or "unauthorized" in error_str: + raise UnifiedClientError( + "POE authentication failed. Your cookies may be expired.", + error_type="auth_error" + ) + elif "not support" in error_str or "vision" in error_str: + raise UnifiedClientError( + f"The selected POE bot '{requested_model}' may not support image inputs. " + "Try using a vision-capable model like gpt-4-vision or claude-3-opus.", + error_type="capability_error" + ) + + raise UnifiedClientError(f"Poe image API error: {e}") + + def _log_truncation_failure(self, messages, response_content, finish_reason, context=None, attempts=None, error_details=None): + """Log truncation failures for analysis - saves to CSV, TXT, and HTML in truncation_logs subfolder""" + try: + # Use output directory if provided, otherwise current directory + base_dir = self.output_dir if self.output_dir else "." + + # Create truncation_logs subfolder inside the output directory + log_dir = os.path.join(base_dir, "truncation_logs") + os.makedirs(log_dir, exist_ok=True) + + # Generate log filename with date + log_date = datetime.now().strftime("%Y%m") + + # CSV log file (keeping for compatibility) + csv_log_file = os.path.join(log_dir, f"truncation_failures_{log_date}.csv") + + # TXT log file (human-readable format) + txt_log_file = os.path.join(log_dir, f"truncation_failures_{log_date}.txt") + + # HTML log file (web-viewable format) + html_log_file = os.path.join(log_dir, f"truncation_failures_{log_date}.html") + + # Summary file to track truncated outputs + summary_file = os.path.join(log_dir, f"truncation_summary_{log_date}.json") + + # Check if CSV file exists to determine if we need headers + csv_file_exists = os.path.exists(csv_log_file) + + # Extract output filename - UPDATED LOGIC + output_filename = 'unknown' + + # PRIORITY 1: Use the actual output filename if set via set_output_filename() + if hasattr(self, '_actual_output_filename') and self._actual_output_filename: + output_filename = self._actual_output_filename + # PRIORITY 2: Use current output file if available + elif hasattr(self, '_current_output_file') and self._current_output_file: + output_filename = self._current_output_file + # PRIORITY 3: Use tracked response filename from _save_response + elif hasattr(self, '_last_response_filename') and self._last_response_filename: + # Skip if it's a generic Payloads filename + if not self._last_response_filename.startswith(('response_', 'translation_')): + output_filename = self._last_response_filename + + # FALLBACK: Try to extract from context/messages if no filename was set + if output_filename == 'unknown': + if context == 'translation': + # Try to extract chapter/response filename + chapter_match = re.search(r'Chapter (\d+)', str(messages)) + if chapter_match: + chapter_num = chapter_match.group(1) + # Use the standard format that matches book output + safe_title = f"Chapter_{chapter_num}" + output_filename = f"response_{chapter_num.zfill(3)}_{safe_title}.html" + else: + # Try chunk pattern + chunk_match = re.search(r'Chunk (\d+)/(\d+).*Chapter (\d+)', str(messages)) + if chunk_match: + chunk_num = chunk_match.group(1) + chapter_num = chunk_match.group(3) + safe_title = f"Chapter_{chapter_num}" + output_filename = f"response_{chapter_num.zfill(3)}_{safe_title}_chunk_{chunk_num}.html" + elif context == 'image_translation': + # Extract image filename if available + img_match = re.search(r'([\w\-]+\.(jpg|jpeg|png|gif|webp))', str(messages), re.IGNORECASE) + if img_match: + output_filename = f"image_{img_match.group(1)}" + + # Load or create summary tracking + summary_data = {"truncated_files": set(), "total_truncations": 0, "by_type": {}} + if os.path.exists(summary_file): + try: + with open(summary_file, 'r', encoding='utf-8') as f: + loaded_data = json.load(f) + summary_data["truncated_files"] = set(loaded_data.get("truncated_files", [])) + summary_data["total_truncations"] = loaded_data.get("total_truncations", 0) + summary_data["by_type"] = loaded_data.get("by_type", {}) + except: + pass + + # Update summary + summary_data["truncated_files"].add(output_filename) + summary_data["total_truncations"] += 1 + truncation_type_key = f"{finish_reason}_{context or 'unknown'}" + summary_data["by_type"][truncation_type_key] = summary_data["by_type"].get(truncation_type_key, 0) + 1 + + # Save summary + save_summary = { + "truncated_files": sorted(list(summary_data["truncated_files"])), + "total_truncations": summary_data["total_truncations"], + "by_type": summary_data["by_type"], + "last_updated": datetime.now().isoformat() + } + with self._file_write_lock: + with open(summary_file, 'w', encoding='utf-8') as f: + json.dump(save_summary, f, indent=2, ensure_ascii=False) + + # Prepare log entry + # Compute safe input length and serialize error details safely + def _safe_content_len(c): + if isinstance(c, str): + return len(c) + if isinstance(c, list): + total = 0 + for p in c: + if isinstance(p, dict): + if isinstance(p.get('text'), str): + total += len(p['text']) + elif isinstance(p.get('image_url'), dict): + url = p['image_url'].get('url') + if isinstance(url, str): + total += len(url) + elif isinstance(p, str): + total += len(p) + return total + return len(str(c)) if c is not None else 0 + + input_length_value = sum(_safe_content_len(msg.get('content')) for msg in (messages or [])) + + truncation_type_label = 'explicit' if finish_reason == 'length' else 'silent' + + error_details_str = '' + if error_details is not None: + try: + error_details_str = json.dumps(error_details, ensure_ascii=False) + except Exception: + error_details_str = str(error_details) + + log_entry = { + 'timestamp': datetime.now().isoformat(), + 'model': self.model, + 'provider': self.client_type, + 'context': context or 'unknown', + 'finish_reason': finish_reason, + 'attempts': attempts or 1, + 'input_length': input_length_value, + 'output_length': len(response_content) if response_content else 0, + 'truncation_type': truncation_type_label, + 'content_refused': 'yes' if finish_reason == 'content_filter' else 'no', + 'last_50_chars': response_content[-50:] if response_content else '', + 'error_details': error_details_str, + 'input_preview': self._get_safe_preview(messages), + 'output_preview': response_content[:200] if response_content else '', + 'output_filename': output_filename # Add output filename to log entry + } + + # Write to CSV + with self._file_write_lock: + with open(csv_log_file, 'a', newline='', encoding='utf-8') as f: + fieldnames = [ + 'timestamp', 'model', 'provider', 'context', 'finish_reason', + 'attempts', 'input_length', 'output_length', 'truncation_type', + 'content_refused', 'last_50_chars', 'error_details', + 'input_preview', 'output_preview', 'output_filename' + ] + + writer = csv.DictWriter(f, fieldnames=fieldnames) + + # Write header if new file + if not csv_file_exists: + writer.writeheader() + + writer.writerow(log_entry) + + # Write to TXT file with human-readable format + with self._file_write_lock: + with open(txt_log_file, 'a', encoding='utf-8') as f: + f.write(f"\n{'='*80}\n") + f.write(f"TRUNCATION LOG ENTRY - {log_entry['timestamp']}\n") + f.write(f"{'='*80}\n") + f.write(f"Output File: {log_entry['output_filename']}\n") + f.write(f"Model: {log_entry['model']}\n") + f.write(f"Provider: {log_entry['provider']}\n") + f.write(f"Context: {log_entry['context']}\n") + f.write(f"Finish Reason: {log_entry['finish_reason']}\n") + f.write(f"Attempts: {log_entry['attempts']}\n") + f.write(f"Input Length: {log_entry['input_length']} chars\n") + f.write(f"Output Length: {log_entry['output_length']} chars\n") + f.write(f"Truncation Type: {log_entry['truncation_type']}\n") + f.write(f"Content Refused: {log_entry['content_refused']}\n") + + if log_entry['error_details']: + f.write(f"Error Details: {log_entry['error_details']}\n") + + f.write(f"\n--- Input Preview ---\n") + f.write(f"{log_entry['input_preview']}\n") + + f.write(f"\n--- Output Preview ---\n") + f.write(f"{log_entry['output_preview']}\n") + + if log_entry['last_50_chars']: + f.write(f"\n--- Last 50 Characters ---\n") + f.write(f"{log_entry['last_50_chars']}\n") + + f.write(f"\n{'='*80}\n") + + # Write to HTML file with nice formatting + html_file_exists = os.path.exists(html_log_file) + + # Create or update HTML file + if not html_file_exists: + # Create new HTML file with header + html_content = ('\n' + '\n' + '
\n' + '\n' + '{p.strip()}
' for p in paragraphs if p.strip()] + return f'<{tag}>{title}{tag}>\n\n' + '\n\n'.join(formatted_paragraphs) + + return html_content + + try: + # Check for Google Cloud credentials with better error messages + google_creds_path = None + + # Try multiple possible locations for credentials + possible_paths = [ + os.getenv('GOOGLE_APPLICATION_CREDENTIALS'), + os.getenv('GOOGLE_CLOUD_CREDENTIALS'), + self.config.get('google_cloud_credentials') if hasattr(self, 'config') else None, + self.config.get('google_vision_credentials') if hasattr(self, 'config') else None, + ] + + for path in possible_paths: + if path and os.path.exists(path): + google_creds_path = path + break + + if not google_creds_path: + raise UnifiedClientError( + "Google Cloud credentials not found.\n\n" + "To use Google Translate, you need to:\n" + "1. Create a Google Cloud service account\n" + "2. Download the JSON credentials file\n" + "3. Set it up in Glossarion:\n" + " - For GUI: Use the 'Set up Google Cloud Translate Credentials' button\n" + " - For CLI: Set GOOGLE_APPLICATION_CREDENTIALS environment variable\n\n" + "The same credentials work for both Google Translate and Cloud Vision (manga OCR)." + ) + + # Set the environment variable for the Google client library + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_creds_path + logger.info(f"Using Google Cloud credentials: {os.path.basename(google_creds_path)}") + + # Initialize the client + translate_client = google_translate.Client() + + # Extract ONLY user content to translate - ignore AI system prompts + text_to_translate = "" + source_lang = None + target_lang = 'en' # Default to English + + # Extract only user messages, ignore system prompts completely + for msg in messages: + if msg['role'] == 'user': + text_to_translate = msg['content'] + # Simple language detection from content patterns + if any(ord(char) >= 0xAC00 and ord(char) <= 0xD7AF for char in text_to_translate[:100]): + source_lang = 'ko' # Korean + elif any(ord(char) >= 0x3040 and ord(char) <= 0x309F for char in text_to_translate[:100]) or \ + any(ord(char) >= 0x30A0 and ord(char) <= 0x30FF for char in text_to_translate[:100]): + source_lang = 'ja' # Japanese + elif any(ord(char) >= 0x4E00 and ord(char) <= 0x9FFF for char in text_to_translate[:100]): + source_lang = 'zh' # Chinese + break # Take only the first user message + + if not text_to_translate: + # Return empty response instead of error + return UnifiedResponse( + content="", + finish_reason='complete', + usage={'characters': 0}, + raw_response={} + ) + + # Log the translation request + logger.info(f"Google Translate: Translating {len(text_to_translate)} characters") + if source_lang: + logger.info(f"Google Translate: Source language: {source_lang}") + + # Perform translation + start_time = time.time() + + # Google Translate API call - force text format for markdown content + # Detect if this is markdown from html2text (starts with #) + is_markdown = text_to_translate.strip().startswith('#') + translate_format = 'text' if is_markdown else ('html' if '<' in text_to_translate else 'text') + + + if source_lang: + result = translate_client.translate( + text_to_translate, + source_language=source_lang, + target_language=target_lang, + format_=translate_format + ) + else: + # Auto-detect source language + result = translate_client.translate( + text_to_translate, + target_language=target_lang, + format_=translate_format + ) + + elapsed_time = time.time() - start_time + + # Handle both single result and list of results + if isinstance(result, list): + result = result[0] if result else {} + + translated_text = result.get('translatedText', '') + detected_lang = result.get('detectedSourceLanguage', source_lang) + + # FIX: Convert literal \n characters to actual line breaks + if '\\n' in translated_text: + translated_text = translated_text.replace('\\n', '\n') + logger.debug("Converted literal \\n characters to actual line breaks") + + # Also handle other escaped characters that might appear + if '\\r' in translated_text: + translated_text = translated_text.replace('\\r', '\r') + if '\\t' in translated_text: + translated_text = translated_text.replace('\\t', '\t') + + import re + + # Fix markdown structure issues in Google Translate text output + original_text = translated_text + + if is_markdown and translate_format == 'text': + # Google Translate in text mode removes line breaks from markdown + # Need to restore proper markdown structure + + # Pattern: "#6. Title Content goes here" -> "#6. Title\n\nContent goes here" + markdown_fix = re.match(r'^(#{1,6}[^\n]*?)([A-Z][^#]+)$', translated_text.strip(), re.DOTALL) + if markdown_fix: + header_part = markdown_fix.group(1).strip() + content_part = markdown_fix.group(2).strip() + + # Try to split header from content intelligently + # Look for patterns like "6. Title Name was" -> "6. Title" + "Name was" + title_content_match = re.match(r'^(.*?)([A-Z][a-z]+\s+(?:was|were|had|did|is|are)\s+.*)$', content_part, re.DOTALL) + if title_content_match: + title_end = title_content_match.group(1).strip() + content_start = title_content_match.group(2).strip() + + # Restore paragraph structure in the content + paragraphs = re.split(r'(?<=[.!?])\s+(?=[A-Z])', content_start) + formatted_content = '\n\n'.join(paragraphs) + + translated_text = f"{header_part} {title_end}\n\n{formatted_content}" + else: + # Fallback: try to split at reasonable word boundary + words = content_part.split() + if len(words) > 3: + for i in range(2, min(6, len(words)-2)): + if words[i][0].isupper(): + title_words = ' '.join(words[:i]) + content_words = ' '.join(words[i:]) + + # Restore paragraph structure in the content + paragraphs = re.split(r'(?<=[.!?])\s+(?=[A-Z])', content_words) + formatted_content = '\n\n'.join(paragraphs) + + translated_text = f"{header_part} {title_words}\n\n{formatted_content}" + break + + if translate_format == 'html': + # Apply HTML structure fixes for HTML mode + translated_text = fix_google_translate_html(translated_text) + + + # Create UnifiedResponse object + response = UnifiedResponse( + content=translated_text, + finish_reason='complete', + usage={ + 'characters': len(text_to_translate), + 'detected_source_lang': detected_lang + }, + raw_response=result + ) + + logger.info(f"Google Translate: Translation completed in {elapsed_time:.2f}s") + + return response + + except UnifiedClientError: + # Re-raise our custom errors with helpful messages + raise + except Exception as e: + # Provide more helpful error messages for common issues + error_msg = str(e) + + if "403" in error_msg or "permission" in error_msg.lower(): + raise UnifiedClientError( + "Google Translate API permission denied.\n\n" + "Please ensure:\n" + "1. Cloud Translation API is enabled in your Google Cloud project\n" + "2. Your service account has the 'Cloud Translation API User' role\n" + "3. Billing is enabled for your project (required for Translation API)\n\n" + f"Original error: {error_msg}" + ) + elif "billing" in error_msg.lower(): + raise UnifiedClientError( + "Google Cloud billing not enabled.\n\n" + "The Translation API requires billing to be enabled on your project.\n" + "Visit: https://console.cloud.google.com/billing\n\n" + f"Original error: {error_msg}" + ) + else: + raise UnifiedClientError(f"Google Translate API error: {error_msg}")