davidtran999 commited on
Commit
519b145
·
1 Parent(s): a5fd3d2

Push full code from hue-portal-backend folder

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +63 -1
  2. backend/.DS_Store +0 -0
  3. backend/API_MODE_FIX.md +82 -0
  4. backend/API_MODE_READY.md +108 -0
  5. backend/CHECK_API_MODE.md +47 -0
  6. backend/DUAL_PATH_RAG_README.md +319 -0
  7. backend/Dockerfile +24 -0
  8. backend/FIX_LOCAL_LLM_ISSUE.md +91 -0
  9. backend/GENERAL_CONVERSATION_FIX.md +130 -0
  10. backend/HF_SPACES_NOT_RECEIVING.md +97 -0
  11. backend/LLM_SWITCH_GUIDE.md +211 -0
  12. backend/OPTIMIZE_CHATBOT_PERFORMANCE.md +642 -0
  13. backend/TEST_API_MODE.md +83 -0
  14. backend/WHY_LLM_NOT_CALLED.md +76 -0
  15. backend/chuyenapichatbot.py +0 -0
  16. backend/docs/API_ENDPOINTS.md +152 -0
  17. backend/docs/INTENT_CLASSIFICATION_IMPROVEMENTS.md +87 -0
  18. backend/docs/LEGAL_REFRESH.md +55 -0
  19. backend/docs/OCR_SETUP.md +56 -0
  20. backend/golden_queries_example.json +68 -0
  21. backend/hue_portal/Procfile +0 -0
  22. backend/hue_portal/chatbot/__init__.py +4 -0
  23. backend/hue_portal/chatbot/advanced_features.py +185 -0
  24. backend/hue_portal/chatbot/analytics.py +194 -0
  25. backend/hue_portal/chatbot/apps.py +7 -0
  26. backend/hue_portal/chatbot/cache_monitor.py +195 -0
  27. backend/hue_portal/chatbot/chatbot.py +1092 -0
  28. backend/hue_portal/chatbot/context_manager.py +220 -0
  29. backend/hue_portal/chatbot/dialogue_manager.py +173 -0
  30. backend/hue_portal/chatbot/document_topics.py +74 -0
  31. backend/hue_portal/chatbot/download_progress.py +294 -0
  32. backend/hue_portal/chatbot/dual_path_router.py +274 -0
  33. backend/hue_portal/chatbot/entity_extraction.py +395 -0
  34. backend/hue_portal/chatbot/exact_match_cache.py +61 -0
  35. backend/hue_portal/chatbot/fast_path_handler.py +59 -0
  36. backend/hue_portal/chatbot/legal_guardrails.py +35 -0
  37. backend/hue_portal/chatbot/llm_integration.py +1746 -0
  38. backend/hue_portal/chatbot/llm_integration.py.backup +372 -0
  39. backend/hue_portal/chatbot/llm_integration.py.bak +877 -0
  40. backend/hue_portal/chatbot/query_expansion.py +228 -0
  41. backend/hue_portal/chatbot/router.py +165 -0
  42. backend/hue_portal/chatbot/schemas/legal_answer.rail +63 -0
  43. backend/hue_portal/chatbot/slow_path_handler.py +1392 -0
  44. backend/hue_portal/chatbot/structured_legal.py +276 -0
  45. backend/hue_portal/chatbot/tests/__init__.py +1 -0
  46. backend/hue_portal/chatbot/tests/__pycache__/test_smoke.cpython-310.pyc +0 -0
  47. backend/hue_portal/chatbot/tests/test_intent_keywords.py +29 -0
  48. backend/hue_portal/chatbot/tests/test_intent_training.py +22 -0
  49. backend/hue_portal/chatbot/tests/test_router.py +41 -0
  50. backend/hue_portal/chatbot/tests/test_smoke.py +29 -0
Dockerfile CHANGED
@@ -54,11 +54,73 @@ fi
54
  echo "[Docker] Collecting static files..."
55
  python /app/hue_portal/manage.py collectstatic --noinput || echo "[Docker] Collectstatic failed, continuing..."
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  echo "[Docker] Starting gunicorn..."
58
- exec gunicorn -b 0.0.0.0:7860 --timeout 1800 --graceful-timeout 1800 --worker-class sync hue_portal.hue_portal.wsgi:application
 
 
 
59
  EOF
60
 
61
  RUN chmod +x /entrypoint.sh
62
 
63
  EXPOSE 7860
64
  CMD ["/entrypoint.sh"]
 
 
 
 
 
 
 
54
  echo "[Docker] Collecting static files..."
55
  python /app/hue_portal/manage.py collectstatic --noinput || echo "[Docker] Collectstatic failed, continuing..."
56
 
57
+ echo "[Docker] Preloading all models to avoid first-request timeout..."
58
+ python -c "
59
+ import os
60
+ import sys
61
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'hue_portal.hue_portal.settings')
62
+ import django
63
+ django.setup()
64
+
65
+ print('[Docker] 🔄 Starting model preload...', flush=True)
66
+
67
+ # 1. Preload Embedding Model (BGE-M3)
68
+ try:
69
+ print('[Docker] 📦 Preloading embedding model (BGE-M3)...', flush=True)
70
+ from hue_portal.core.embeddings import get_embedding_model
71
+ embedding_model = get_embedding_model()
72
+ if embedding_model:
73
+ print('[Docker] ✅ Embedding model preloaded successfully', flush=True)
74
+ else:
75
+ print('[Docker] ⚠️ Embedding model not loaded', flush=True)
76
+ except Exception as e:
77
+ print(f'[Docker] ⚠️ Embedding model preload failed: {e}', flush=True)
78
+
79
+ # 2. Preload LLM Model (llama.cpp)
80
+ llm_provider = os.environ.get('DEFAULT_LLM_PROVIDER') or os.environ.get('LLM_PROVIDER', '')
81
+ if llm_provider.lower() == 'llama_cpp':
82
+ try:
83
+ print('[Docker] 📦 Preloading LLM model (llama.cpp)...', flush=True)
84
+ from hue_portal.chatbot.llm_integration import get_llm_generator
85
+ llm_gen = get_llm_generator()
86
+ if llm_gen and hasattr(llm_gen, 'llama_cpp') and llm_gen.llama_cpp:
87
+ print('[Docker] ✅ LLM model preloaded successfully', flush=True)
88
+ else:
89
+ print('[Docker] ⚠️ LLM model not loaded (may load on first request)', flush=True)
90
+ except Exception as e:
91
+ print(f'[Docker] ⚠️ LLM model preload failed: {e} (will load on first request)', flush=True)
92
+ else:
93
+ print(f'[Docker] ⏭️ Skipping LLM preload (provider is {llm_provider or \"not set\"}, not llama_cpp)', flush=True)
94
+
95
+ # 3. Preload Reranker Model
96
+ try:
97
+ print('[Docker] 📦 Preloading reranker model...', flush=True)
98
+ from hue_portal.core.reranker import get_reranker
99
+ reranker = get_reranker()
100
+ if reranker:
101
+ print('[Docker] ✅ Reranker model preloaded successfully', flush=True)
102
+ else:
103
+ print('[Docker] ⚠️ Reranker model not loaded (may load on first request)', flush=True)
104
+ except Exception as e:
105
+ print(f'[Docker] ⚠️ Reranker preload failed: {e} (will load on first request)', flush=True)
106
+
107
+ print('[Docker] ✅ Model preload completed', flush=True)
108
+ " || echo "[Docker] ⚠️ Model preload had errors (models will load on first request)"
109
+
110
  echo "[Docker] Starting gunicorn..."
111
+ # Reduce tokenizers parallelism warnings and risk of fork deadlocks
112
+ export TOKENIZERS_PARALLELISM=false
113
+ # Shorter timeouts to avoid long hangs; adjust if needed
114
+ cd /app/backend && export PYTHONPATH="/app/backend:${PYTHONPATH}" && exec gunicorn -b 0.0.0.0:7860 --timeout 600 --graceful-timeout 600 --worker-class sync --config python:hue_portal.hue_portal.gunicorn_app hue_portal.hue_portal.gunicorn_app:application
115
  EOF
116
 
117
  RUN chmod +x /entrypoint.sh
118
 
119
  EXPOSE 7860
120
  CMD ["/entrypoint.sh"]
121
+
122
+ EXPOSE 7860
123
+ CMD ["/entrypoint.sh"]
124
+
125
+ EXPOSE 7860
126
+ CMD ["/entrypoint.sh"]
backend/.DS_Store ADDED
Binary file (6.15 kB). View file
 
backend/API_MODE_FIX.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sửa lỗi API Mode - HF Spaces không nhận được documents
2
+
3
+ ## Vấn đề
4
+ Khi backend gọi HF Spaces API, nó chỉ gửi `query` đơn giản, không gửi `prompt` đã build từ documents. Do đó HF Spaces không nhận được thông tin từ documents đã retrieve.
5
+
6
+ ## Đã sửa
7
+
8
+ ### 1. `llm_integration.py` - Line 309
9
+ **Trước:**
10
+ ```python
11
+ elif self.provider == LLM_PROVIDER_API:
12
+ result = self._generate_api(query, context)
13
+ ```
14
+
15
+ **Sau:**
16
+ ```python
17
+ elif self.provider == LLM_PROVIDER_API:
18
+ # For API mode, send the full prompt (with documents) as the message
19
+ # This ensures HF Spaces receives all context from retrieved documents
20
+ result = self._generate_api(prompt, context)
21
+ ```
22
+
23
+ ### 2. `llm_integration.py` - `_generate_api()` method
24
+ **Trước:**
25
+ ```python
26
+ def _generate_api(self, query: str, context: Optional[List[Dict[str, Any]]] = None) -> Optional[str]:
27
+ payload = {
28
+ "message": query, # Chỉ gửi query đơn giản
29
+ "reset_session": False
30
+ }
31
+ ```
32
+
33
+ **Sau:**
34
+ ```python
35
+ def _generate_api(self, prompt: str, context: Optional[List[Dict[str, Any]]] = None) -> Optional[str]:
36
+ # Send the full prompt (with documents) as the message to HF Spaces
37
+ payload = {
38
+ "message": prompt, # Gửi prompt đầy đủ có documents
39
+ "reset_session": False
40
+ }
41
+ ```
42
+
43
+ ### 3. Thêm logging chi tiết
44
+ - Log khi gọi API: `[LLM] 🔗 Calling API: ...`
45
+ - Log payload: `[LLM] 📤 Payload: ...`
46
+ - Log response: `[LLM] 📥 Response status: ...`
47
+ - Log errors chi tiết
48
+
49
+ ## Cách test
50
+
51
+ 1. **Restart backend server:**
52
+ ```bash
53
+ pkill -f "manage.py runserver"
54
+ cd backend && source venv/bin/activate && cd hue_portal
55
+ python3 manage.py runserver 0.0.0.0:8000
56
+ ```
57
+
58
+ 2. **Test trong UI:**
59
+ - Mở http://localhost:3000
60
+ - Gửi câu hỏi: "Mức phạt vượt đèn đỏ là bao nhiêu?"
61
+ - Xem server logs để thấy:
62
+ - `[RAG] Using LLM provider: api`
63
+ - `[LLM] 🔗 Calling API: ...`
64
+ - `[LLM] 📥 Response status: 200`
65
+ - `[LLM] ✅ Got message from API`
66
+
67
+ 3. **Kiểm tra response:**
68
+ - Response phải từ LLM (có văn bản tự nhiên, không phải template)
69
+ - Response phải chứa thông tin từ documents đã retrieve
70
+
71
+ ## Lưu ý
72
+
73
+ - Prompt có thể dài (có documents), nhưng HF Spaces API hỗ trợ prompt dài
74
+ - Nếu timeout, có thể tăng timeout trong `_generate_api()` (hiện tại 60s)
75
+ - Nếu vẫn không hoạt động, kiểm tra:
76
+ - HF Spaces có đang chạy không
77
+ - Internet connection
78
+ - Server logs để xem lỗi cụ thể
79
+
80
+
81
+
82
+
backend/API_MODE_READY.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API Mode - Trạng thái sẵn sàng
2
+
3
+ ## ✅ Project đã sẵn sàng để test với API mode!
4
+
5
+ ### Đã hoàn thành:
6
+
7
+ 1. **Code Integration** ✅
8
+ - `llm_integration.py` đã có method `_generate_api()`
9
+ - API mode được support đầy đủ
10
+ - Error handling và timeout được xử lý
11
+
12
+ 2. **Configuration** ✅
13
+ - File `.env` đã được tạo với `LLM_PROVIDER=api`
14
+ - API URL đã được set: `https://davidtran999-hue-portal-backend.hf.space/api`
15
+
16
+ 3. **Scripts** ✅
17
+ - `switch_llm_provider.py` - để switch giữa các providers
18
+ - `test_api_mode.py` - để test API connection
19
+
20
+ ### Cách sử dụng:
21
+
22
+ #### 1. Kiểm tra cấu hình hiện tại:
23
+ ```bash
24
+ python3 switch_llm_provider.py show
25
+ ```
26
+
27
+ #### 2. Đảm bảo đang dùng API mode:
28
+ ```bash
29
+ python3 switch_llm_provider.py api
30
+ ```
31
+
32
+ #### 3. Test API connection:
33
+ ```bash
34
+ python3 test_api_mode.py
35
+ ```
36
+
37
+ #### 4. Restart Django server:
38
+ ```bash
39
+ # Nếu dùng manage.py
40
+ python manage.py runserver
41
+
42
+ # Nếu dùng gunicorn
43
+ systemctl restart gunicorn
44
+ # hoặc
45
+ pkill -f gunicorn && gunicorn your_app.wsgi:application
46
+ ```
47
+
48
+ ### Lưu ý:
49
+
50
+ 1. **API Endpoint phải đang chạy**
51
+ - Hugging Face Space phải được deploy và running
52
+ - URL: `https://davidtran999-hue-portal-backend.hf.space/api`
53
+ - Endpoint: `/api/chatbot/chat/`
54
+
55
+ 2. **Model Loading Time**
56
+ - Lần đầu gọi API có thể mất thời gian (model đang load)
57
+ - Có thể nhận 503 (Service Unavailable) - đây là bình thường
58
+ - Đợi vài phút rồi thử lại
59
+
60
+ 3. **Request Format**
61
+ - API expect: `{"message": "text", "reset_session": false}`
62
+ - Không cần `session_id` (sẽ được generate tự động)
63
+
64
+ ### Troubleshooting:
65
+
66
+ #### API timeout:
67
+ - Kiểm tra internet connection
68
+ - Kiểm tra Hugging Face Space có đang running không
69
+ - Kiểm tra URL có đúng không
70
+
71
+ #### API trả về 503:
72
+ - Model đang loading, đợi vài phút rồi thử lại
73
+ - Đây là bình thường cho lần đầu tiên
74
+
75
+ #### API trả về 400:
76
+ - Kiểm tra request format
77
+ - Đảm bảo `message` field có giá trị
78
+
79
+ ### Test thủ công:
80
+
81
+ ```python
82
+ import requests
83
+
84
+ url = "https://davidtran999-hue-portal-backend.hf.space/api/chatbot/chat/"
85
+ payload = {
86
+ "message": "Xin chào",
87
+ "reset_session": False
88
+ }
89
+
90
+ response = requests.post(url, json=payload, timeout=60)
91
+ print(f"Status: {response.status_code}")
92
+ print(f"Response: {response.json()}")
93
+ ```
94
+
95
+ ### Kết luận:
96
+
97
+ **Project đã sẵn sàng về mặt code!**
98
+
99
+ Chỉ cần:
100
+ 1. Đảm bảo Hugging Face Space đang chạy
101
+ 2. Restart Django server
102
+ 3. Test với một câu hỏi đơn giản
103
+
104
+ Code sẽ tự động:
105
+ - Gọi API endpoint đúng
106
+ - Xử lý errors
107
+ - Return response message
108
+
backend/CHECK_API_MODE.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Kiểm tra API Mode
2
+
3
+ ## Vấn đề
4
+ Response hiện tại là template-based, không phải từ LLM API mode.
5
+
6
+ ## Đã làm
7
+ 1. ✅ Cấu hình đã đúng: `LLM_PROVIDER=api`
8
+ 2. ✅ Test trực tiếp API mode hoạt động
9
+ 3. ✅ Đã thêm logging vào RAG pipeline để debug
10
+
11
+ ## Cách kiểm tra
12
+
13
+ ### 1. Kiểm tra server logs
14
+ Khi gửi request, xem logs có:
15
+ - `[RAG] Using LLM provider: api`
16
+ - `[LLM] Generating answer with provider: api`
17
+ - `[LLM] ✅ Answer generated successfully` hoặc error
18
+
19
+ ### 2. Test trực tiếp
20
+ ```bash
21
+ curl -X POST http://localhost:8000/api/chatbot/chat/ \
22
+ -H "Content-Type: application/json" \
23
+ -d '{"message": "Mức phạt vượt đèn đỏ là bao nhiêu?", "reset_session": false}'
24
+ ```
25
+
26
+ ### 3. Kiểm tra trong code
27
+ - RAG pipeline gọi `llm.generate_answer()` với `use_llm=True`
28
+ - LLM generator có `provider == "api"`
29
+ - `_generate_api()` được gọi với query
30
+
31
+ ## Nguyên nhân có thể
32
+
33
+ 1. **API timeout**: HF Spaces API có thể timeout
34
+ 2. **API trả về None**: API có thể trả về None và fallback về template
35
+ 3. **LLM không available**: `get_llm_generator()` có thể trả về None
36
+
37
+ ## Giải pháp
38
+
39
+ Nếu API mode không hoạt động:
40
+ 1. Kiểm tra Hugging Face Space có đang chạy không
41
+ 2. Kiểm tra internet connection
42
+ 3. Kiểm tra API URL có đúng không
43
+ 4. Xem server logs để biết lỗi cụ thể
44
+
45
+
46
+
47
+
backend/DUAL_PATH_RAG_README.md ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dual-Path RAG Architecture
2
+
3
+ ## Overview
4
+
5
+ Dual-Path RAG là kiến trúc tối ưu cho chatbot legal, tách biệt 2 đường xử lý:
6
+
7
+ - **Fast Path**: Golden dataset (200 câu phổ biến) → <200ms, 100% accuracy
8
+ - **Slow Path**: Full RAG pipeline → 4-8s, 99.99% accuracy
9
+
10
+ ## Architecture
11
+
12
+ ```
13
+ User Query
14
+
15
+ Intent Classification
16
+
17
+ Dual-Path Router
18
+ ├─ Keyword Router (exact/fuzzy match)
19
+ ├─ Semantic Similarity Search (threshold 0.85)
20
+ └─ LLM Router (optional, for edge cases)
21
+
22
+ ┌─────────────────┬─────────────────┐
23
+ │ Fast Path │ Slow Path │
24
+ │ (<200ms) │ (4-8s) │
25
+ │ │ │
26
+ │ Golden Dataset │ Full RAG: │
27
+ │ - Exact match │ - Hybrid Search │
28
+ │ - Fuzzy match │ - Top 20 docs │
29
+ │ - Similarity │ - LLM Generation │
30
+ │ │ - Guardrails │
31
+ │ 100% accuracy │ 99.99% accuracy │
32
+ └─────────────────┴─────────────────┘
33
+
34
+ Response + Routing Log
35
+ ```
36
+
37
+ ## Components
38
+
39
+ ### 1. Database Models
40
+
41
+ **GoldenQuery**: Stores verified queries and responses
42
+ - `query`, `query_normalized`, `query_embedding`
43
+ - `intent`, `response_message`, `response_data`
44
+ - `verified_by`, `usage_count`, `accuracy_score`
45
+
46
+ **QueryRoutingLog**: Logs routing decisions for monitoring
47
+ - `route` (fast_path/slow_path)
48
+ - `router_method` (keyword/similarity/llm/default)
49
+ - `response_time_ms`, `similarity_score`
50
+
51
+ ### 2. Router Components
52
+
53
+ **KeywordRouter**: Fast keyword-based matching
54
+ - Exact match (normalized query)
55
+ - Fuzzy match (70% word overlap)
56
+ - ~1-5ms latency
57
+
58
+ **DualPathRouter**: Main router with hybrid logic
59
+ - Step 1: Keyword routing (fastest)
60
+ - Step 2: Semantic similarity (threshold 0.85)
61
+ - Step 3: LLM router fallback (optional)
62
+ - Default: Slow Path
63
+
64
+ ### 3. Path Handlers
65
+
66
+ **FastPathHandler**: Returns cached responses from golden dataset
67
+ - Increments usage count
68
+ - Returns verified response instantly
69
+
70
+ **SlowPathHandler**: Full RAG pipeline
71
+ - Hybrid search (BM25 + vector)
72
+ - Top 20 documents
73
+ - LLM generation with structured output
74
+ - Auto-save high-quality responses to golden dataset
75
+
76
+ ## Setup
77
+
78
+ ### 1. Run Migration
79
+
80
+ ```bash
81
+ cd backend/hue_portal
82
+ python manage.py migrate core
83
+ ```
84
+
85
+ ### 2. Import Initial Golden Dataset
86
+
87
+ ```bash
88
+ # Import from JSON file
89
+ python manage.py manage_golden_dataset import --file golden_queries.json --format json
90
+
91
+ # Or import from CSV
92
+ python manage.py manage_golden_dataset import --file golden_queries.csv --format csv
93
+ ```
94
+
95
+ ### 3. Generate Embeddings (for semantic search)
96
+
97
+ ```bash
98
+ # Generate embeddings for all queries
99
+ python manage.py manage_golden_dataset update_embeddings
100
+
101
+ # Or for specific query
102
+ python manage.py manage_golden_dataset update_embeddings --query-id 123
103
+ ```
104
+
105
+ ## Management Commands
106
+
107
+ ### Import Queries
108
+
109
+ ```bash
110
+ python manage.py manage_golden_dataset import \
111
+ --file golden_queries.json \
112
+ --format json \
113
+ --verify-by legal_expert \
114
+ --skip-embeddings # Skip if embeddings will be generated later
115
+ ```
116
+
117
+ ### Verify Query
118
+
119
+ ```bash
120
+ python manage.py manage_golden_dataset verify \
121
+ --query-id 123 \
122
+ --verify-by gpt4 \
123
+ --accuracy 1.0
124
+ ```
125
+
126
+ ### Update Embeddings
127
+
128
+ ```bash
129
+ python manage.py manage_golden_dataset update_embeddings \
130
+ --batch-size 10
131
+ ```
132
+
133
+ ### View Statistics
134
+
135
+ ```bash
136
+ python manage.py manage_golden_dataset stats
137
+ ```
138
+
139
+ ### Export Dataset
140
+
141
+ ```bash
142
+ python manage.py manage_golden_dataset export \
143
+ --file exported_queries.json \
144
+ --active-only
145
+ ```
146
+
147
+ ### Delete Query
148
+
149
+ ```bash
150
+ # Soft delete (deactivate)
151
+ python manage.py manage_golden_dataset delete --query-id 123 --soft
152
+
153
+ # Hard delete
154
+ python manage.py manage_golden_dataset delete --query-id 123
155
+ ```
156
+
157
+ ## API Endpoints
158
+
159
+ ### Chat Endpoint (unchanged)
160
+
161
+ ```
162
+ POST /api/chatbot/chat/
163
+ {
164
+ "message": "Mức phạt vượt đèn đỏ là bao nhiêu?",
165
+ "session_id": "optional-uuid",
166
+ "reset_session": false
167
+ }
168
+ ```
169
+
170
+ Response includes routing metadata:
171
+ ```json
172
+ {
173
+ "message": "...",
174
+ "intent": "search_fine",
175
+ "results": [...],
176
+ "_source": "fast_path", // or "slow_path"
177
+ "_routing": {
178
+ "path": "fast_path",
179
+ "method": "keyword",
180
+ "confidence": 1.0
181
+ },
182
+ "_golden_query_id": 123 // if fast_path
183
+ }
184
+ ```
185
+
186
+ ### Analytics Endpoint
187
+
188
+ ```
189
+ GET /api/chatbot/analytics/?days=7&type=all
190
+ ```
191
+
192
+ Returns:
193
+ - `routing`: Fast/Slow path statistics
194
+ - `golden_dataset`: Golden dataset stats
195
+ - `performance`: P50/P95/P99 response times
196
+
197
+ ## Golden Dataset Format
198
+
199
+ ### JSON Format
200
+
201
+ ```json
202
+ [
203
+ {
204
+ "query": "Mức phạt vượt đèn đỏ là bao nhiêu?",
205
+ "intent": "search_fine",
206
+ "response_message": "Mức phạt vượt đèn đỏ là từ 200.000 - 400.000 VNĐ...",
207
+ "response_data": {
208
+ "message": "...",
209
+ "intent": "search_fine",
210
+ "results": [...],
211
+ "count": 1
212
+ },
213
+ "verified_by": "legal_expert",
214
+ "accuracy_score": 1.0
215
+ }
216
+ ]
217
+ ```
218
+
219
+ ### CSV Format
220
+
221
+ ```csv
222
+ query,intent,response_message,response_data
223
+ "Mức phạt vượt đèn đỏ là bao nhiêu?","search_fine","Mức phạt...","{\"message\":\"...\",\"results\":[...]}"
224
+ ```
225
+
226
+ ## Monitoring
227
+
228
+ ### Routing Statistics
229
+
230
+ ```python
231
+ from hue_portal.chatbot.analytics import get_routing_stats
232
+
233
+ stats = get_routing_stats(days=7)
234
+ print(f"Fast Path: {stats['fast_path_percentage']:.1f}%")
235
+ print(f"Slow Path: {stats['slow_path_percentage']:.1f}%")
236
+ print(f"Fast Path Avg Time: {stats['fast_path_avg_time_ms']:.1f}ms")
237
+ print(f"Slow Path Avg Time: {stats['slow_path_avg_time_ms']:.1f}ms")
238
+ ```
239
+
240
+ ### Golden Dataset Stats
241
+
242
+ ```python
243
+ from hue_portal.chatbot.analytics import get_golden_dataset_stats
244
+
245
+ stats = get_golden_dataset_stats()
246
+ print(f"Active queries: {stats['active_queries']}")
247
+ print(f"Embedding coverage: {stats['embedding_coverage']:.1f}%")
248
+ ```
249
+
250
+ ## Best Practices
251
+
252
+ ### 1. Building Golden Dataset
253
+
254
+ - Start with 50-100 most common queries from logs
255
+ - Verify each response manually or with strong LLM (GPT-4/Claude)
256
+ - Add queries gradually based on usage patterns
257
+ - Target: 200 queries covering 80% of traffic
258
+
259
+ ### 2. Verification Process
260
+
261
+ - **Weekly review**: Check top 20 most-used queries
262
+ - **Monthly audit**: Review all queries for accuracy
263
+ - **Update embeddings**: When adding new queries
264
+ - **Version control**: Track changes with `version` field
265
+
266
+ ### 3. Tuning Similarity Threshold
267
+
268
+ - Default: 0.85 (conservative, high precision)
269
+ - Lower (0.75): More queries go to Fast Path, but risk false matches
270
+ - Higher (0.90): Fewer false matches, but more queries go to Slow Path
271
+
272
+ ### 4. Auto-Save from Slow Path
273
+
274
+ Slow Path automatically saves high-quality responses:
275
+ - Confidence >= 0.95
276
+ - Has results
277
+ - Message length > 50 chars
278
+ - Not already in golden dataset
279
+
280
+ Review auto-saved queries weekly and verify before activating.
281
+
282
+ ## Troubleshooting
283
+
284
+ ### Fast Path not matching
285
+
286
+ 1. Check if query is normalized correctly
287
+ 2. Verify golden query exists: `GoldenQuery.objects.filter(query_normalized=...)`
288
+ 3. Check similarity threshold (may be too high)
289
+ 4. Ensure embeddings are generated: `update_embeddings`
290
+
291
+ ### Slow performance
292
+
293
+ 1. Check routing logs: `QueryRoutingLog.objects.filter(route='fast_path')`
294
+ 2. Verify Fast Path percentage (should be ~80%)
295
+ 3. Check embedding model loading time
296
+ 4. Monitor database query performance
297
+
298
+ ### Low accuracy
299
+
300
+ 1. Review golden dataset verification
301
+ 2. Check `accuracy_score` of golden queries
302
+ 3. Monitor Slow Path responses for quality
303
+ 4. Update golden queries with better responses
304
+
305
+ ## Expected Performance
306
+
307
+ - **Fast Path**: <200ms (target: <100ms)
308
+ - **Slow Path**: 4-8s (full RAG pipeline)
309
+ - **Overall**: 80% queries <200ms, 20% queries 4-8s
310
+ - **Cache Hit Rate**: 75-85% (Fast Path usage)
311
+
312
+ ## Next Steps
313
+
314
+ 1. Import initial 200 common queries
315
+ 2. Generate embeddings for all queries
316
+ 3. Monitor routing statistics for 1 week
317
+ 4. Tune similarity threshold based on metrics
318
+ 5. Expand golden dataset based on usage patterns
319
+
backend/Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
4
+ WORKDIR /app
5
+
6
+ # System dependencies (OCR + build essentials)
7
+ RUN apt-get update && \
8
+ apt-get install -y --no-install-recommends \
9
+ build-essential \
10
+ tesseract-ocr \
11
+ tesseract-ocr-eng \
12
+ tesseract-ocr-vie \
13
+ libpoppler-cpp-dev \
14
+ pkg-config \
15
+ libgl1 && \
16
+ rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY backend/requirements.txt /app/requirements.txt
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ COPY backend /app
22
+
23
+ CMD ["gunicorn", "-b", "0.0.0.0:8000", "hue_portal.hue_portal.wsgi:application"]
24
+
backend/FIX_LOCAL_LLM_ISSUE.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fix: Server đang dùng Local LLM thay vì API Mode
2
+
3
+ ## Vấn đề
4
+ Khi test chat trên web, server đang chạy local LLM trên máy thay vì gọi HF Spaces API.
5
+
6
+ ## Nguyên nhân
7
+ 1. **Global instance cache:** `get_llm_generator()` sử dụng global instance `_llm_generator` chỉ tạo một lần
8
+ 2. **Server start với env cũ:** Nếu server start với `LLM_PROVIDER=local`, instance sẽ giữ provider=local
9
+ 3. **Không reload khi env thay đổi:** Khi `.env` được update, server không tự động reload instance
10
+
11
+ ## Đã sửa
12
+
13
+ ### File: `backend/hue_portal/chatbot/llm_integration.py`
14
+
15
+ **Trước:**
16
+ ```python
17
+ _llm_generator: Optional[LLMGenerator] = None
18
+
19
+ def get_llm_generator() -> Optional[LLMGenerator]:
20
+ global _llm_generator
21
+ if _llm_generator is None:
22
+ _llm_generator = LLMGenerator()
23
+ return _llm_generator if _llm_generator.is_available() else None
24
+ ```
25
+
26
+ **Sau:**
27
+ ```python
28
+ _llm_generator: Optional[LLMGenerator] = None
29
+ _last_provider: Optional[str] = None
30
+
31
+ def get_llm_generator() -> Optional[LLMGenerator]:
32
+ """Get or create LLM generator instance.
33
+
34
+ Recreates instance if provider changed (e.g., from local to api).
35
+ """
36
+ global _llm_generator, _last_provider
37
+
38
+ # Get current provider from env
39
+ current_provider = os.environ.get("LLM_PROVIDER", LLM_PROVIDER_NONE).lower()
40
+
41
+ # Recreate if provider changed or instance doesn't exist
42
+ if _llm_generator is None or _last_provider != current_provider:
43
+ _llm_generator = LLMGenerator()
44
+ _last_provider = current_provider
45
+ print(f"[LLM] 🔄 Recreated LLM generator with provider: {current_provider}", flush=True)
46
+
47
+ return _llm_generator if _llm_generator.is_available() else None
48
+ ```
49
+
50
+ ## Cách test
51
+
52
+ 1. **Đảm bảo `.env` có đúng config:**
53
+ ```bash
54
+ cd backend
55
+ cat .env | grep LLM
56
+ # Should show:
57
+ # LLM_PROVIDER=api
58
+ # HF_API_BASE_URL=https://davidtran999-hue-portal-backend.hf.space/api
59
+ ```
60
+
61
+ 2. **Restart server:**
62
+ ```bash
63
+ pkill -f "manage.py runserver"
64
+ cd backend && source venv/bin/activate && cd hue_portal
65
+ python3 manage.py runserver 0.0.0.0:8000
66
+ ```
67
+
68
+ 3. **Test trong web UI:**
69
+ - Mở http://localhost:3000/chat
70
+ - Gửi câu hỏi: "Mức phạt vượt đèn đỏ là bao nhiêu?"
71
+ - Xem server logs để thấy:
72
+ - `[LLM] 🔄 Recreated LLM generator with provider: api`
73
+ - `[RAG] Using LLM provider: api`
74
+ - `[LLM] 🔗 Calling API: https://davidtran999-hue-portal-backend.hf.space/api/chatbot/chat/`
75
+
76
+ 4. **Kiểm tra response:**
77
+ - Response phải từ HF Spaces API (có văn bản tự nhiên, không phải template)
78
+ - KHÔNG thấy logs về local model loading
79
+
80
+ ## Lưu ý
81
+
82
+ - Server sẽ tự động recreate LLM instance khi provider thay đổi
83
+ - Không cần restart server khi thay đổi `.env` (nhưng nên restart để đảm bảo)
84
+ - Nếu vẫn dùng local LLM, kiểm tra:
85
+ - `.env` có `LLM_PROVIDER=api` không
86
+ - Server có load đúng `.env` không
87
+ - Xem server logs để biết provider nào đang được dùng
88
+
89
+
90
+
91
+
backend/GENERAL_CONVERSATION_FIX.md ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sửa Chatbot để Hỗ trợ General Conversation
2
+
3
+ ## Vấn đề
4
+
5
+ Chatbot không trả lời được như một chatbot AI thông thường vì:
6
+ 1. **Chỉ gọi LLM khi có documents** → Không thể trả lời general queries
7
+ 2. **Trả về error message ngay khi không có documents** → Không cho LLM cơ hội trả lời
8
+
9
+ ## Giải pháp đã áp dụng
10
+
11
+ ### 1. Sửa `rag.py` - Cho phép LLM trả lời ngay cả khi không có documents
12
+
13
+ **File:** `backend/hue_portal/core/rag.py`
14
+
15
+ **Thay đổi:**
16
+ - Trước: Trả về error message ngay khi không có documents
17
+ - Sau: Gọi LLM ngay cả khi không có documents (general conversation mode)
18
+
19
+ ```python
20
+ # Trước:
21
+ if not documents:
22
+ return error_message # ← Không gọi LLM
23
+
24
+ # Sau:
25
+ # Gọi LLM trước (ngay cả khi không có documents)
26
+ if use_llm:
27
+ llm_answer = llm.generate_answer(query, context=context, documents=documents if documents else [])
28
+ if llm_answer:
29
+ return llm_answer
30
+
31
+ # Chỉ trả về error nếu không có LLM và không có documents
32
+ if not documents:
33
+ return error_message
34
+ ```
35
+
36
+ ### 2. Sửa `llm_integration.py` - Prompt cho general conversation
37
+
38
+ **File:** `backend/hue_portal/chatbot/llm_integration.py`
39
+
40
+ **Thay đổi:**
41
+ - Nếu có documents → Yêu cầu trả lời dựa trên documents (strict mode)
42
+ - Nếu không có documents → Cho phép general conversation (friendly mode)
43
+
44
+ ```python
45
+ if documents:
46
+ # Strict mode: chỉ trả lời dựa trên documents
47
+ prompt_parts.extend([...])
48
+ else:
49
+ # General conversation mode
50
+ prompt_parts.extend([
51
+ "- Trả lời câu hỏi một cách tự nhiên và hữu ích như một chatbot AI thông thường",
52
+ "- Nếu câu hỏi liên quan đến pháp luật nhưng không có thông tin, hãy nói rõ",
53
+ ...
54
+ ])
55
+ ```
56
+
57
+ ### 3. Sửa `rag_pipeline` - Luôn gọi generate_answer_template
58
+
59
+ **File:** `backend/hue_portal/core/rag.py`
60
+
61
+ **Thay đổi:**
62
+ - Trước: Trả về error ngay khi không có documents
63
+ - Sau: Luôn gọi `generate_answer_template` để cho LLM cơ hội trả lời
64
+
65
+ ```python
66
+ # Trước:
67
+ if not documents:
68
+ return {'answer': error_message, ...} # ← Không gọi LLM
69
+
70
+ # Sau:
71
+ # Luôn gọi generate_answer_template (sẽ gọi LLM nếu có)
72
+ answer = generate_answer_template(query, documents, content_type, context=context, use_llm=use_llm)
73
+ ```
74
+
75
+ ### 4. Sửa `chatbot.py` - Sử dụng answer từ LLM ngay cả khi count=0
76
+
77
+ **File:** `backend/hue_portal/chatbot/chatbot.py`
78
+
79
+ **Thay đổi:**
80
+ - Trước: Chỉ sử dụng RAG result nếu `count > 0`
81
+ - Sau: Sử dụng answer từ LLM ngay cả khi `count = 0`
82
+
83
+ ```python
84
+ # Trước:
85
+ if rag_result["count"] > 0 and rag_result["confidence"] >= confidence:
86
+ # Sử dụng answer
87
+
88
+ # Sau:
89
+ if rag_result.get("answer") and (rag_result["count"] > 0 or rag_result.get("answer", "").strip()):
90
+ # Sử dụng answer (kể cả khi count=0)
91
+ ```
92
+
93
+ ## Kết quả
94
+
95
+ ✅ **LLM được gọi ngay cả khi không có documents**
96
+ - Logs cho thấy: `[RAG] Using LLM provider: api`
97
+ - Logs cho thấy: `[LLM] 🔗 Calling API: ...`
98
+
99
+ ⚠️ **API trả về 500 error**
100
+ - Có thể do HF Spaces API đang gặp lỗi
101
+ - Hoặc prompt quá dài
102
+ - Cần kiểm tra HF Spaces logs
103
+
104
+ ## Cách test
105
+
106
+ 1. **Test với general query:**
107
+ ```bash
108
+ curl -X POST http://localhost:8000/api/chatbot/chat/ \
109
+ -H "Content-Type: application/json" \
110
+ -d '{"message":"mấy giờ rồi","reset_session":false}'
111
+ ```
112
+
113
+ 2. **Xem logs:**
114
+ ```bash
115
+ tail -f /tmp/django_general_conv.log | grep -E "\[RAG\]|\[LLM\]"
116
+ ```
117
+
118
+ 3. **Kiểm tra LLM có được gọi:**
119
+ - Tìm: `[RAG] Using LLM provider: api`
120
+ - Tìm: `[LLM] 🔗 Calling API: ...`
121
+
122
+ ## Lưu ý
123
+
124
+ - **API mode cần HF Spaces hoạt động** → Nếu API trả về 500, cần kiểm tra HF Spaces
125
+ - **Local mode** sẽ hoạt động tốt hơn nếu có GPU
126
+ - **General conversation** chỉ hoạt động khi LLM available
127
+
128
+
129
+
130
+
backend/HF_SPACES_NOT_RECEIVING.md ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Vấn đề: HF Spaces không nhận được request từ project local
2
+
3
+ ## Phân tích
4
+
5
+ Từ logs HF Spaces:
6
+ - HF Spaces đang load **local model** (Qwen/Qwen2.5-7B-Instruct)
7
+ - HF Spaces **KHÔNG** nhận được request từ project local
8
+ - Khi project local gọi API, response vẫn là **template-based**
9
+
10
+ ## Nguyên nhân có thể
11
+
12
+ 1. **LLM không được gọi khi có documents:**
13
+ - RAG pipeline có `use_llm=True` nhưng LLM generation có thể fail
14
+ - Fallback về template khi LLM fail
15
+
16
+ 2. **LLM generation fail:**
17
+ - API timeout
18
+ - API trả về None
19
+ - Error trong quá trình generate
20
+
21
+ 3. **Server local không load đúng env:**
22
+ - Server khởi động trước khi `.env` được update
23
+ - Cần restart server
24
+
25
+ ## Giải pháp
26
+
27
+ ### 1. Đảm bảo server load đúng env
28
+ ```bash
29
+ # Stop server
30
+ pkill -f "manage.py runserver"
31
+
32
+ # Start lại với env mới
33
+ cd backend && source venv/bin/activate && cd hue_portal
34
+ python3 manage.py runserver 0.0.0.0:8000
35
+ ```
36
+
37
+ ### 2. Kiểm tra logs khi test
38
+ Khi gửi request với documents, xem logs có:
39
+ - `[RAG] Using LLM provider: api`
40
+ - `[LLM] 🔗 Calling API: ...`
41
+ - `[LLM] 📥 Response status: 200`
42
+
43
+ Nếu không thấy logs này, có nghĩa là:
44
+ - LLM không được gọi
45
+ - Hoặc LLM generation fail trước khi gọi API
46
+
47
+ ### 3. Test trực tiếp API mode
48
+ ```bash
49
+ cd backend && source venv/bin/activate
50
+ python3 -c "
51
+ import os
52
+ os.environ['LLM_PROVIDER'] = 'api'
53
+ os.environ['HF_API_BASE_URL'] = 'https://davidtran999-hue-portal-backend.hf.space/api'
54
+ import sys
55
+ sys.path.insert(0, 'hue_portal')
56
+ from chatbot.llm_integration import LLMGenerator, LLM_PROVIDER_API
57
+ llm = LLMGenerator(provider=LLM_PROVIDER_API)
58
+ result = llm._generate_api('Test prompt with documents')
59
+ print(f'Result: {result}')
60
+ "
61
+ ```
62
+
63
+ ## Debug steps
64
+
65
+ 1. **Kiểm tra env variables:**
66
+ ```bash
67
+ cd backend && cat .env | grep LLM
68
+ ```
69
+
70
+ 2. **Restart server:**
71
+ ```bash
72
+ pkill -f "manage.py runserver"
73
+ cd backend && source venv/bin/activate && cd hue_portal
74
+ python3 manage.py runserver 0.0.0.0:8000
75
+ ```
76
+
77
+ 3. **Test với câu hỏi có documents:**
78
+ ```bash
79
+ curl -X POST http://localhost:8000/api/chatbot/chat/ \
80
+ -H "Content-Type: application/json" \
81
+ -d '{"message": "Mức phạt vượt đèn đỏ là bao nhiêu?", "reset_session": false}'
82
+ ```
83
+
84
+ 4. **Xem server logs:**
85
+ - Tìm `[RAG]` logs
86
+ - Tìm `[LLM]` logs
87
+ - Tìm error messages
88
+
89
+ ## Lưu ý
90
+
91
+ - HF Spaces logs cho thấy nó đang dùng **local model**, không phải API mode
92
+ - Điều này có nghĩa là HF Spaces đang chạy độc lập, không nhận request từ project local
93
+ - Project local cần gọi HF Spaces API để nhận response từ model trên HF Spaces
94
+
95
+
96
+
97
+
backend/LLM_SWITCH_GUIDE.md ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hướng dẫn Switch LLM Provider
2
+
3
+ > **Mặc định kể từ bản cập nhật này, chatbot sẽ dùng local model Qwen/Qwen2.5-7B-Instruct (8-bit) nếu bạn không cấu hình `LLM_PROVIDER`.**
4
+ > Bạn có thể dùng script bên dưới để chuyển sang API/OpenAI/... bất kỳ lúc nào.
5
+
6
+ Script để thay đổi LLM provider linh hoạt giữa local model, API mode, và các providers khác.
7
+
8
+ ## Cách sử dụng
9
+
10
+ ### Method 1: Python Script (Chi tiết)
11
+
12
+ ```bash
13
+ # Xem cấu hình hiện tại
14
+ python3 switch_llm_provider.py show
15
+
16
+ # Switch sang local model
17
+ python3 switch_llm_provider.py local
18
+
19
+ # Switch sang local với custom model
20
+ python3 switch_llm_provider.py local --model Qwen/Qwen2.5-14B-Instruct --device cuda --8bit
21
+
22
+ # Switch sang API mode
23
+ python3 switch_llm_provider.py api
24
+
25
+ # Switch sang API với custom URL
26
+ python3 switch_llm_provider.py api --url https://custom-api.hf.space/api
27
+
28
+ # Switch sang OpenAI
29
+ python3 switch_llm_provider.py openai
30
+
31
+ # Switch sang Anthropic
32
+ python3 switch_llm_provider.py anthropic
33
+
34
+ # Switch sang Ollama
35
+ python3 switch_llm_provider.py ollama
36
+
37
+ # Tắt LLM (chỉ dùng template)
38
+ python3 switch_llm_provider.py none
39
+ ```
40
+
41
+ ### Method 2: Shell Script (Nhanh)
42
+
43
+ ```bash
44
+ # Xem cấu hình hiện tại
45
+ ./llm_switch.sh
46
+
47
+ # Switch sang local
48
+ ./llm_switch.sh local
49
+
50
+ # Switch sang API
51
+ ./llm_switch.sh api
52
+
53
+ # Switch sang OpenAI
54
+ ./llm_switch.sh openai
55
+
56
+ # Tắt LLM
57
+ ./llm_switch.sh none
58
+ ```
59
+
60
+ ## Các Providers hỗ trợ
61
+
62
+ ### 1. Local Model (`local`)
63
+ Sử dụng local Hugging Face model trên máy của bạn.
64
+
65
+ **Cấu hình:**
66
+ ```bash
67
+ LLM_PROVIDER=local
68
+ LOCAL_MODEL_PATH=Qwen/Qwen2.5-7B-Instruct
69
+ LOCAL_MODEL_DEVICE=cuda # hoặc cpu, auto
70
+ LOCAL_MODEL_8BIT=true # hoặc false
71
+ LOCAL_MODEL_4BIT=false # hoặc true
72
+ ```
73
+
74
+ **Ví dụ:**
75
+ ```bash
76
+ # 7B model với 8-bit quantization
77
+ python3 switch_llm_provider.py local --model Qwen/Qwen2.5-7B-Instruct --device cuda --8bit
78
+
79
+ # 14B model với 4-bit quantization
80
+ python3 switch_llm_provider.py local --model Qwen/Qwen2.5-14B-Instruct --device cuda --4bit
81
+ ```
82
+
83
+ ### 2. API Mode (`api`)
84
+ Gọi API của Hugging Face Spaces.
85
+
86
+ **Cấu hình:**
87
+ ```bash
88
+ LLM_PROVIDER=api
89
+ HF_API_BASE_URL=https://davidtran999-hue-portal-backend.hf.space/api
90
+ ```
91
+
92
+ **Ví dụ:**
93
+ ```bash
94
+ # Sử dụng default API URL
95
+ python3 switch_llm_provider.py api
96
+
97
+ # Sử dụng custom API URL
98
+ python3 switch_llm_provider.py api --url https://your-custom-api.hf.space/api
99
+ ```
100
+
101
+ ### 3. OpenAI (`openai`)
102
+ Sử dụng OpenAI API.
103
+
104
+ **Cấu hình:**
105
+ ```bash
106
+ LLM_PROVIDER=openai
107
+ OPENAI_API_KEY=your-api-key-here
108
+ ```
109
+
110
+ **Ví dụ:**
111
+ ```bash
112
+ python3 switch_llm_provider.py openai
113
+ ```
114
+
115
+ ### 4. Anthropic (`anthropic`)
116
+ Sử dụng Anthropic Claude API.
117
+
118
+ **Cấu hình:**
119
+ ```bash
120
+ LLM_PROVIDER=anthropic
121
+ ANTHROPIC_API_KEY=your-api-key-here
122
+ ```
123
+
124
+ **Ví dụ:**
125
+ ```bash
126
+ python3 switch_llm_provider.py anthropic
127
+ ```
128
+
129
+ ### 5. Ollama (`ollama`)
130
+ Sử dụng Ollama local server.
131
+
132
+ **Cấu hình:**
133
+ ```bash
134
+ LLM_PROVIDER=ollama
135
+ OLLAMA_BASE_URL=http://localhost:11434
136
+ OLLAMA_MODEL=qwen2.5:7b
137
+ ```
138
+
139
+ **Ví dụ:**
140
+ ```bash
141
+ python3 switch_llm_provider.py ollama
142
+ ```
143
+
144
+ ### 6. None (`none`)
145
+ Tắt LLM, chỉ sử dụng template-based generation.
146
+
147
+ **Ví dụ:**
148
+ ```bash
149
+ python3 switch_llm_provider.py none
150
+ ```
151
+
152
+ ## Lưu ý quan trọng
153
+
154
+ 1. **Restart Server**: Sau khi thay đổi provider, cần restart Django server để áp dụng:
155
+ ```bash
156
+ # Nếu dùng manage.py
157
+ python manage.py runserver
158
+
159
+ # Nếu dùng gunicorn
160
+ systemctl restart gunicorn
161
+ # hoặc
162
+ pkill -f gunicorn && gunicorn ...
163
+ ```
164
+
165
+ 2. **Local Model Requirements**:
166
+ - Cần GPU với đủ VRAM (7B 8-bit: ~7GB, 14B 4-bit: ~8GB)
167
+ - Cần cài đặt: `transformers`, `accelerate`, `bitsandbytes`
168
+ - Model sẽ được download tự động lần đầu
169
+
170
+ 3. **API Mode**:
171
+ - Cần internet connection
172
+ - API endpoint phải đang hoạt động
173
+ - Có thể có rate limits
174
+
175
+ 4. **Environment Variables**:
176
+ - Script sẽ tự động tạo/update file `.env` trong thư mục `backend/`
177
+ - Nếu không có file `.env`, script sẽ tạo mới
178
+
179
+ ## Troubleshooting
180
+
181
+ ### Local model không load được
182
+ - Kiểm tra GPU có đủ VRAM không
183
+ - Thử model nhỏ hơn: `Qwen/Qwen2.5-1.5B-Instruct`
184
+ - Thử dùng CPU: `--device cpu` (chậm hơn)
185
+
186
+ ### API mode không hoạt động
187
+ - Kiểm tra internet connection
188
+ - Kiểm tra API URL có đúng không
189
+ - Kiểm tra API endpoint có đang chạy không
190
+
191
+ ### Script không tìm thấy .env file
192
+ - Script sẽ tự động tạo file `.env` mới
193
+ - Hoặc tạo thủ công: `touch backend/.env`
194
+
195
+ ## Examples
196
+
197
+ ### Development: Dùng API mode (nhanh, không cần GPU)
198
+ ```bash
199
+ ./llm_switch.sh api
200
+ ```
201
+
202
+ ### Production: Dùng local model (tốt nhất, không tốn API cost)
203
+ ```bash
204
+ ./llm_switch.sh local --model Qwen/Qwen2.5-7B-Instruct --device cuda --8bit
205
+ ```
206
+
207
+ ### Testing: Tắt LLM (chỉ template)
208
+ ```bash
209
+ ./llm_switch.sh none
210
+ ```
211
+
backend/OPTIMIZE_CHATBOT_PERFORMANCE.md ADDED
@@ -0,0 +1,642 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tối ưu Tốc độ và Độ chính xác Chatbot
2
+
3
+ Ngày tạo: 2025-01-27
4
+
5
+ ## 1. Phân tích Bottlenecks hiện tại
6
+
7
+ ### 1.1 Intent Classification
8
+ **Vấn đề:**
9
+ - Loop qua nhiều keywords mỗi lần (fine_keywords: 9 items, fine_single_words: 7 items)
10
+ - Tính `_remove_accents()` nhiều lần cho cùng keyword
11
+ - Không có compiled regex patterns
12
+
13
+ **Impact:** ~5-10ms mỗi query
14
+
15
+ ### 1.2 Search Pipeline
16
+ **Vấn đề:**
17
+ - `list(queryset)` - Load TẤT CẢ objects vào memory trước khi search
18
+ - TF-IDF vectorization cho toàn bộ dataset mỗi lần
19
+ - Không có early exit khi tìm thấy kết quả tốt
20
+ - Query expansion query database mỗi lần
21
+
22
+ **Impact:** ~100-500ms cho dataset lớn
23
+
24
+ ### 1.3 LLM Generation
25
+ **Vấn đề:**
26
+ - Prompt được build lại mỗi lần (không cache)
27
+ - Không có streaming response
28
+ - max_new_tokens=150 (OK) nhưng có thể tối ưu thêm
29
+ - Không cache generated responses
30
+
31
+ **Impact:** ~1-5s cho local model, ~2-10s cho API
32
+
33
+ ### 1.4 Không có Response Caching
34
+ **Vấn đề:**
35
+ - Cùng query được xử lý lại từ đầu
36
+ - Search results không được cache
37
+ - Intent classification không được cache
38
+
39
+ **Impact:** ~100-500ms cho duplicate queries
40
+
41
+ ## 2. Tối ưu Intent Classification
42
+
43
+ ### 2.1 Pre-compile Keyword Patterns
44
+
45
+ ```python
46
+ # backend/hue_portal/core/chatbot.py
47
+
48
+ import re
49
+ from functools import lru_cache
50
+
51
+ class Chatbot:
52
+ def __init__(self):
53
+ self.intent_classifier = None
54
+ self.vectorizer = None
55
+ # Pre-compile keyword patterns
56
+ self._compile_keyword_patterns()
57
+ self._train_classifier()
58
+
59
+ def _compile_keyword_patterns(self):
60
+ """Pre-compile regex patterns for faster matching."""
61
+ # Fine keywords (multi-word first, then single)
62
+ self.fine_patterns_multi = [
63
+ re.compile(r'\b' + re.escape(kw) + r'\b', re.IGNORECASE)
64
+ for kw in ["mức phạt", "vi phạm", "đèn đỏ", "nồng độ cồn",
65
+ "mũ bảo hiểm", "tốc độ", "bằng lái", "vượt đèn"]
66
+ ]
67
+ self.fine_patterns_single = [
68
+ re.compile(r'\b' + re.escape(kw) + r'\b', re.IGNORECASE)
69
+ for kw in ["phạt", "vượt", "đèn", "mức"]
70
+ ]
71
+
72
+ # Pre-compute accent-free versions
73
+ self.fine_keywords_ascii = [self._remove_accents(kw) for kw in
74
+ ["mức phạt", "vi phạm", "đèn đỏ", ...]]
75
+
76
+ # Procedure, Office, Advisory patterns...
77
+ # Similar pattern compilation
78
+
79
+ @lru_cache(maxsize=1000)
80
+ def classify_intent(self, query: str) -> Tuple[str, float]:
81
+ """Cached intent classification."""
82
+ query_lower = query.lower().strip()
83
+
84
+ # Fast path: Check compiled patterns
85
+ for pattern in self.fine_patterns_multi:
86
+ if pattern.search(query_lower):
87
+ return ("search_fine", 0.95)
88
+
89
+ # ... rest of logic
90
+ ```
91
+
92
+ **Lợi ích:**
93
+ - Giảm ~50% thời gian intent classification
94
+ - Cache kết quả cho duplicate queries
95
+
96
+ ### 2.2 Early Exit Strategy
97
+
98
+ ```python
99
+ def _keyword_based_intent(self, query: str) -> Tuple[str, float]:
100
+ query_lower = query.lower().strip()
101
+
102
+ # Fast path: Check most common intents first
103
+ # Fine queries are most common → check first
104
+ if any(pattern.search(query_lower) for pattern in self.fine_patterns_multi):
105
+ return ("search_fine", 0.95)
106
+
107
+ # Early exit for very short queries (likely greeting)
108
+ if len(query.split()) <= 2:
109
+ if any(greeting in query_lower for greeting in ["xin chào", "chào", "hello"]):
110
+ return ("greeting", 0.9)
111
+
112
+ # ... rest
113
+ ```
114
+
115
+ ## 3. Tối ưu Search Pipeline
116
+
117
+ ### 3.1 Limit QuerySet trước khi Load
118
+
119
+ ```python
120
+ # backend/hue_portal/core/search_ml.py
121
+
122
+ def search_with_ml(queryset, query, text_fields, top_k=20, min_score=0.1, use_hybrid=True):
123
+ if not query:
124
+ return queryset[:top_k]
125
+
126
+ # OPTIMIZATION: Limit queryset early for large datasets
127
+ # Only search in first N records if dataset is huge
128
+ MAX_SEARCH_CANDIDATES = 1000
129
+ total_count = queryset.count()
130
+
131
+ if total_count > MAX_SEARCH_CANDIDATES:
132
+ # Use database-level filtering first
133
+ # Try exact match on primary field first
134
+ primary_field = text_fields[0] if text_fields else None
135
+ if primary_field:
136
+ exact_matches = queryset.filter(
137
+ **{f"{primary_field}__icontains": query}
138
+ )[:top_k * 2]
139
+
140
+ if exact_matches.count() >= top_k:
141
+ # We have enough exact matches, return them
142
+ return exact_matches[:top_k]
143
+
144
+ # Limit candidates for ML search
145
+ queryset = queryset[:MAX_SEARCH_CANDIDATES]
146
+
147
+ # Continue with existing search logic...
148
+ ```
149
+
150
+ ### 3.2 Cache Search Results
151
+
152
+ ```python
153
+ # backend/hue_portal/core/search_ml.py
154
+
155
+ from functools import lru_cache
156
+ import hashlib
157
+ import json
158
+
159
+ def _get_query_hash(query: str, model_name: str, text_fields: tuple) -> str:
160
+ """Generate hash for query caching."""
161
+ key = f"{query}|{model_name}|{':'.join(text_fields)}"
162
+ return hashlib.md5(key.encode()).hexdigest()
163
+
164
+ # Cache search results for 1 hour
165
+ @lru_cache(maxsize=500)
166
+ def _cached_search(query_hash: str, queryset_ids: tuple, top_k: int):
167
+ """Cached search results."""
168
+ # This will be called with actual queryset in wrapper
169
+ pass
170
+
171
+ def search_with_ml(queryset, query, text_fields, top_k=20, min_score=0.1, use_hybrid=True):
172
+ # Check cache first
173
+ query_hash = _get_query_hash(query, queryset.model.__name__, tuple(text_fields))
174
+
175
+ # Try to get from cache (if queryset hasn't changed)
176
+ # Note: Full caching requires tracking queryset state
177
+
178
+ # ... existing search logic
179
+ ```
180
+
181
+ ### 3.3 Optimize TF-IDF Calculation
182
+
183
+ ```python
184
+ # Pre-compute TF-IDF vectors for common queries
185
+ # Use incremental TF-IDF instead of recalculating
186
+
187
+ from sklearn.feature_extraction.text import TfidfVectorizer
188
+ import numpy as np
189
+
190
+ class CachedTfidfVectorizer:
191
+ """TF-IDF vectorizer with caching."""
192
+
193
+ def __init__(self):
194
+ self.vectorizer = None
195
+ self.doc_vectors = None
196
+ self.doc_ids = None
197
+
198
+ def fit_transform_cached(self, documents: List[str], doc_ids: List[int]):
199
+ """Fit and cache document vectors."""
200
+ if self.doc_ids == tuple(doc_ids):
201
+ # Same documents, reuse vectors
202
+ return self.doc_vectors
203
+
204
+ # New documents, recompute
205
+ self.vectorizer = TfidfVectorizer(
206
+ analyzer='word',
207
+ ngram_range=(1, 2),
208
+ min_df=1,
209
+ max_df=0.95,
210
+ lowercase=True
211
+ )
212
+ self.doc_vectors = self.vectorizer.fit_transform(documents)
213
+ self.doc_ids = tuple(doc_ids)
214
+ return self.doc_vectors
215
+ ```
216
+
217
+ ### 3.4 Early Exit khi có Exact Match
218
+
219
+ ```python
220
+ def search_with_ml(queryset, query, text_fields, top_k=20, min_score=0.1, use_hybrid=True):
221
+ # OPTIMIZATION: Check exact matches first (fastest)
222
+ query_normalized = normalize_text(query)
223
+
224
+ # Try exact match on primary field
225
+ primary_field = text_fields[0] if text_fields else None
226
+ if primary_field:
227
+ exact_qs = queryset.filter(**{f"{primary_field}__iexact": query})
228
+ if exact_qs.exists():
229
+ # Found exact match, return immediately
230
+ return exact_qs[:top_k]
231
+
232
+ # Try case-insensitive contains (faster than ML)
233
+ contains_qs = queryset.filter(**{f"{primary_field}__icontains": query})
234
+ if contains_qs.count() <= top_k * 2:
235
+ # Small result set, return directly
236
+ return contains_qs[:top_k]
237
+
238
+ # Only use ML search if no good exact matches
239
+ # ... existing ML search logic
240
+ ```
241
+
242
+ ## 4. Tối ưu LLM Generation
243
+
244
+ ### 4.1 Prompt Caching
245
+
246
+ ```python
247
+ # backend/hue_portal/chatbot/llm_integration.py
248
+
249
+ from functools import lru_cache
250
+ import hashlib
251
+
252
+ class LLMGenerator:
253
+ def __init__(self, provider: Optional[str] = None):
254
+ self.provider = provider or LLM_PROVIDER
255
+ self.prompt_cache = {} # Cache prompts by hash
256
+ self.response_cache = {} # Cache responses
257
+
258
+ def _get_prompt_hash(self, query: str, documents: List[Any]) -> str:
259
+ """Generate hash for prompt caching."""
260
+ doc_ids = [getattr(doc, 'id', None) for doc in documents[:5]]
261
+ key = f"{query}|{doc_ids}"
262
+ return hashlib.md5(key.encode()).hexdigest()
263
+
264
+ def generate_answer(self, query: str, context: Optional[List[Dict]], documents: Optional[List[Any]]):
265
+ if not self.is_available():
266
+ return None
267
+
268
+ # Check cache first
269
+ prompt_hash = self._get_prompt_hash(query, documents or [])
270
+ if prompt_hash in self.response_cache:
271
+ cached_response = self.response_cache[prompt_hash]
272
+ # Check if cache is still valid (e.g., < 1 hour old)
273
+ if cached_response.get('timestamp', 0) > time.time() - 3600:
274
+ return cached_response['response']
275
+
276
+ # Build prompt (may be cached)
277
+ prompt = self._build_prompt(query, context, documents)
278
+ response = self._generate_from_prompt(prompt, context=context)
279
+
280
+ # Cache response
281
+ if response:
282
+ self.response_cache[prompt_hash] = {
283
+ 'response': response,
284
+ 'timestamp': time.time()
285
+ }
286
+
287
+ return response
288
+ ```
289
+
290
+ ### 4.2 Optimize Local Model Generation
291
+
292
+ ```python
293
+ def _generate_local(self, prompt: str) -> Optional[str]:
294
+ # OPTIMIZATION: Use faster generation parameters
295
+ with torch.no_grad():
296
+ outputs = self.local_model.generate(
297
+ **inputs,
298
+ max_new_tokens=100, # Reduced from 150
299
+ temperature=0.5, # Lower for faster generation
300
+ top_p=0.8, # Lower top_p
301
+ do_sample=False, # Greedy decoding (faster)
302
+ use_cache=True,
303
+ pad_token_id=self.local_tokenizer.eos_token_id,
304
+ repetition_penalty=1.1,
305
+ # OPTIMIZATION: Early stopping
306
+ eos_token_id=self.local_tokenizer.eos_token_id,
307
+ )
308
+ ```
309
+
310
+ ### 4.3 Streaming Response (for better UX)
311
+
312
+ ```python
313
+ # For API endpoints, support streaming
314
+ def generate_answer_streaming(self, query: str, context, documents):
315
+ """Generate answer with streaming for better UX."""
316
+ if self.provider == LLM_PROVIDER_LOCAL:
317
+ # Use generate with stream=True
318
+ for token in self._generate_local_streaming(prompt):
319
+ yield token
320
+ elif self.provider == LLM_PROVIDER_OPENAI:
321
+ # Use OpenAI streaming API
322
+ for chunk in self.client.chat.completions.create(
323
+ model="gpt-3.5-turbo",
324
+ messages=[{"role": "user", "content": prompt}],
325
+ stream=True
326
+ ):
327
+ yield chunk.choices[0].delta.content
328
+ ```
329
+
330
+ ## 5. Response Caching Strategy
331
+
332
+ ### 5.1 Multi-level Caching
333
+
334
+ ```python
335
+ # backend/hue_portal/core/cache_utils.py
336
+
337
+ from functools import lru_cache
338
+ from django.core.cache import cache
339
+ import hashlib
340
+ import json
341
+
342
+ class ChatbotCache:
343
+ """Multi-level caching for chatbot responses."""
344
+
345
+ CACHE_TIMEOUT = 3600 # 1 hour
346
+
347
+ @staticmethod
348
+ def get_cache_key(query: str, intent: str, session_id: str = None) -> str:
349
+ """Generate cache key."""
350
+ key_parts = [query.lower().strip(), intent]
351
+ if session_id:
352
+ key_parts.append(session_id)
353
+ key_str = "|".join(key_parts)
354
+ return f"chatbot:{hashlib.md5(key_str.encode()).hexdigest()}"
355
+
356
+ @staticmethod
357
+ def get_cached_response(query: str, intent: str, session_id: str = None):
358
+ """Get cached response."""
359
+ cache_key = ChatbotCache.get_cache_key(query, intent, session_id)
360
+ return cache.get(cache_key)
361
+
362
+ @staticmethod
363
+ def set_cached_response(query: str, intent: str, response: dict, session_id: str = None):
364
+ """Cache response."""
365
+ cache_key = ChatbotCache.get_cache_key(query, intent, session_id)
366
+ cache.set(cache_key, response, ChatbotCache.CACHE_TIMEOUT)
367
+
368
+ @staticmethod
369
+ def get_cached_search_results(query: str, model_name: str, text_fields: tuple):
370
+ """Get cached search results."""
371
+ key = f"search:{hashlib.md5(f'{query}|{model_name}|{text_fields}'.encode()).hexdigest()}"
372
+ return cache.get(key)
373
+
374
+ @staticmethod
375
+ def set_cached_search_results(query: str, model_name: str, text_fields: tuple, results):
376
+ """Cache search results."""
377
+ key = f"search:{hashlib.md5(f'{query}|{model_name}|{text_fields}'.encode()).hexdigest()}"
378
+ cache.set(key, results, ChatbotCache.CACHE_TIMEOUT)
379
+ ```
380
+
381
+ ### 5.2 Integrate vào Chatbot
382
+
383
+ ```python
384
+ # backend/hue_portal/core/chatbot.py
385
+
386
+ from .cache_utils import ChatbotCache
387
+
388
+ class Chatbot:
389
+ def generate_response(self, query: str, session_id: str = None) -> Dict[str, Any]:
390
+ query = query.strip()
391
+
392
+ # Classify intent
393
+ intent, confidence = self.classify_intent(query)
394
+
395
+ # Check cache first
396
+ cached_response = ChatbotCache.get_cached_response(query, intent, session_id)
397
+ if cached_response:
398
+ return cached_response
399
+
400
+ # ... existing logic
401
+
402
+ # Cache response before returning
403
+ response = {
404
+ "message": message,
405
+ "intent": intent,
406
+ "confidence": confidence,
407
+ "results": search_result["results"],
408
+ "count": search_result["count"]
409
+ }
410
+
411
+ ChatbotCache.set_cached_response(query, intent, response, session_id)
412
+ return response
413
+ ```
414
+
415
+ ## 6. Tối ưu Query Expansion
416
+
417
+ ### 6.1 Cache Synonyms
418
+
419
+ ```python
420
+ # backend/hue_portal/core/search_ml.py
421
+
422
+ from django.core.cache import cache
423
+
424
+ @lru_cache(maxsize=1)
425
+ def get_all_synonyms():
426
+ """Get all synonyms (cached)."""
427
+ return list(Synonym.objects.all())
428
+
429
+ def expand_query_with_synonyms(query: str) -> List[str]:
430
+ """Expand query using cached synonyms."""
431
+ query_normalized = normalize_text(query)
432
+ expanded = [query_normalized]
433
+
434
+ # Use cached synonyms
435
+ synonyms = get_all_synonyms()
436
+
437
+ for synonym in synonyms:
438
+ keyword = normalize_text(synonym.keyword)
439
+ alias = normalize_text(synonym.alias)
440
+
441
+ if keyword in query_normalized:
442
+ expanded.append(query_normalized.replace(keyword, alias))
443
+ if alias in query_normalized:
444
+ expanded.append(query_normalized.replace(alias, keyword))
445
+
446
+ return list(set(expanded))
447
+ ```
448
+
449
+ ## 7. Database Query Optimization
450
+
451
+ ### 7.1 Use select_related / prefetch_related
452
+
453
+ ```python
454
+ # backend/hue_portal/core/chatbot.py
455
+
456
+ def search_by_intent(self, intent: str, query: str, limit: int = 5):
457
+ if intent == "search_fine":
458
+ qs = Fine.objects.all().select_related('decree') # If has FK
459
+ # ... rest
460
+
461
+ elif intent == "search_legal":
462
+ qs = LegalSection.objects.all().select_related('document')
463
+ # ... rest
464
+ ```
465
+
466
+ ### 7.2 Add Database Indexes
467
+
468
+ ```python
469
+ # backend/hue_portal/core/models.py
470
+
471
+ class Fine(models.Model):
472
+ name = models.CharField(max_length=500, db_index=True) # Add index
473
+ code = models.CharField(max_length=50, db_index=True) # Add index
474
+
475
+ class Meta:
476
+ indexes = [
477
+ models.Index(fields=['name', 'code']),
478
+ models.Index(fields=['min_fine', 'max_fine']),
479
+ ]
480
+ ```
481
+
482
+ ## 8. Tối ưu Frontend
483
+
484
+ ### 8.1 Debounce Search Input
485
+
486
+ ```typescript
487
+ // frontend/src/pages/Chat.tsx
488
+
489
+ const [input, setInput] = useState('')
490
+ const debouncedInput = useDebounce(input, 300) // Wait 300ms
491
+
492
+ useEffect(() => {
493
+ if (debouncedInput) {
494
+ // Trigger search suggestions
495
+ }
496
+ }, [debouncedInput])
497
+ ```
498
+
499
+ ### 8.2 Optimistic UI Updates
500
+
501
+ ```typescript
502
+ const handleSend = async (messageText?: string) => {
503
+ // Show message immediately (optimistic)
504
+ setMessages(prev => [...prev, {
505
+ role: 'user',
506
+ content: textToSend,
507
+ timestamp: new Date()
508
+ }])
509
+
510
+ // Then fetch response
511
+ const response = await chat(textToSend, sessionId)
512
+ // Update with actual response
513
+ }
514
+ ```
515
+
516
+ ## 9. Monitoring & Metrics
517
+
518
+ ### 9.1 Add Performance Logging
519
+
520
+ ```python
521
+ # backend/hue_portal/chatbot/views.py
522
+
523
+ import time
524
+ from django.utils import timezone
525
+
526
+ @api_view(["POST"])
527
+ def chat(request: Request) -> Response:
528
+ start_time = time.time()
529
+
530
+ # ... existing logic
531
+
532
+ # Log performance metrics
533
+ elapsed = time.time() - start_time
534
+ logger.info(f"[PERF] Chat response time: {elapsed:.3f}s | Intent: {intent} | Results: {count}")
535
+
536
+ # Track slow queries
537
+ if elapsed > 2.0:
538
+ logger.warning(f"[SLOW] Query took {elapsed:.3f}s: {message[:100]}")
539
+
540
+ return Response(response)
541
+ ```
542
+
543
+ ### 9.2 Track Cache Hit Rate
544
+
545
+ ```python
546
+ class ChatbotCache:
547
+ cache_hits = 0
548
+ cache_misses = 0
549
+
550
+ @staticmethod
551
+ def get_cached_response(query: str, intent: str, session_id: str = None):
552
+ cached = cache.get(ChatbotCache.get_cache_key(query, intent, session_id))
553
+ if cached:
554
+ ChatbotCache.cache_hits += 1
555
+ return cached
556
+ ChatbotCache.cache_misses += 1
557
+ return None
558
+
559
+ @staticmethod
560
+ def get_cache_stats():
561
+ total = ChatbotCache.cache_hits + ChatbotCache.cache_misses
562
+ if total == 0:
563
+ return {"hit_rate": 0, "hits": 0, "misses": 0}
564
+ return {
565
+ "hit_rate": ChatbotCache.cache_hits / total,
566
+ "hits": ChatbotCache.cache_hits,
567
+ "misses": ChatbotCache.cache_misses
568
+ }
569
+ ```
570
+
571
+ ## 10. Expected Performance Improvements
572
+
573
+ | Optimization | Current | Optimized | Improvement |
574
+ |-------------|---------|-----------|-------------|
575
+ | Intent Classification | 5-10ms | 1-3ms | **70% faster** |
576
+ | Search (small dataset) | 50-100ms | 10-30ms | **70% faster** |
577
+ | Search (large dataset) | 200-500ms | 50-150ms | **70% faster** |
578
+ | LLM Generation (cached) | 1-5s | 0.01-0.1s | **99% faster** |
579
+ | LLM Generation (uncached) | 1-5s | 0.8-4s | **20% faster** |
580
+ | Total Response (cached) | 100-500ms | 10-50ms | **90% faster** |
581
+ | Total Response (uncached) | 1-6s | 0.5-3s | **50% faster** |
582
+
583
+ ## 11. Implementation Priority
584
+
585
+ ### Phase 1: Quick Wins (1-2 days)
586
+ 1. ✅ Add response caching (Django cache)
587
+ 2. ✅ Pre-compile keyword patterns
588
+ 3. ✅ Cache synonyms
589
+ 4. ✅ Add database indexes
590
+ 5. ✅ Early exit for exact matches
591
+
592
+ ### Phase 2: Medium Impact (3-5 days)
593
+ 1. ✅ Limit QuerySet before loading
594
+ 2. ✅ Optimize TF-IDF calculation
595
+ 3. ✅ Prompt caching for LLM
596
+ 4. ✅ Optimize local model generation
597
+ 5. ✅ Add performance logging
598
+
599
+ ### Phase 3: Advanced (1-2 weeks)
600
+ 1. ✅ Streaming responses
601
+ 2. ✅ Incremental TF-IDF
602
+ 3. ✅ Advanced caching strategies
603
+ 4. ✅ Query result pre-computation
604
+
605
+ ## 12. Testing Performance
606
+
607
+ ```python
608
+ # backend/scripts/benchmark_chatbot.py
609
+
610
+ import time
611
+ import statistics
612
+
613
+ def benchmark_chatbot():
614
+ chatbot = get_chatbot()
615
+ test_queries = [
616
+ "Mức phạt vượt đèn đỏ là bao nhiêu?",
617
+ "Thủ tục đăng ký cư trú cần gì?",
618
+ "Địa chỉ công an phường ở đâu?",
619
+ # ... more queries
620
+ ]
621
+
622
+ times = []
623
+ for query in test_queries:
624
+ start = time.time()
625
+ response = chatbot.generate_response(query)
626
+ elapsed = time.time() - start
627
+ times.append(elapsed)
628
+ print(f"Query: {query[:50]}... | Time: {elapsed:.3f}s")
629
+
630
+ print(f"\nAverage: {statistics.mean(times):.3f}s")
631
+ print(f"Median: {statistics.median(times):.3f}s")
632
+ print(f"P95: {statistics.quantiles(times, n=20)[18]:.3f}s")
633
+ ```
634
+
635
+ ## Kết luận
636
+
637
+ Với các tối ưu trên, chatbot sẽ:
638
+ - **Nhanh hơn 50-90%** cho cached queries
639
+ - **Nhanh hơn 20-70%** cho uncached queries
640
+ - **Chính xác hơn** với early exit và exact matching
641
+ - **Scalable hơn** với database indexes và query limiting
642
+
backend/TEST_API_MODE.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hướng dẫn Test API Mode
2
+
3
+ ## Vấn đề hiện tại
4
+ - HF Spaces không nhận được request từ project local
5
+ - Response vẫn là template-based (không phải từ LLM)
6
+
7
+ ## Đã sửa
8
+ 1. ✅ API mode giờ gửi `prompt` (có documents) thay vì chỉ `query`
9
+ 2. ✅ Đã thêm logging chi tiết: `[LLM] 🔗 Calling API`, `[RAG] Using LLM provider`
10
+
11
+ ## Cách test
12
+
13
+ ### 1. Fix database error (nếu cần)
14
+ ```bash
15
+ # Kiểm tra PostgreSQL có đang chạy không
16
+ psql -h localhost -p 5543 -U hue -d hue_portal
17
+
18
+ # Hoặc dùng SQLite tạm thời (sửa settings.py)
19
+ ```
20
+
21
+ ### 2. Start server với env đúng
22
+ ```bash
23
+ cd /Users/davidtran/Downloads/TryHarDemNayProject/backend
24
+ source venv/bin/activate
25
+ cd hue_portal
26
+
27
+ # Kiểm tra env
28
+ cat ../.env | grep LLM
29
+
30
+ # Start server
31
+ python3 manage.py runserver 0.0.0.0:8000
32
+ ```
33
+
34
+ ### 3. Test API mode
35
+ ```bash
36
+ # Test với câu hỏi có documents
37
+ curl -X POST http://localhost:8000/api/chatbot/chat/ \
38
+ -H "Content-Type: application/json" \
39
+ -d '{"message": "Mức phạt vượt đèn đỏ là bao nhiêu?", "reset_session": false}'
40
+ ```
41
+
42
+ ### 4. Xem server logs
43
+ Tìm các logs sau:
44
+ - `[RAG] Using LLM provider: api` - LLM được gọi
45
+ - `[LLM] 🔗 Calling API: https://davidtran999-hue-portal-backend.hf.space/api/chatbot/chat/` - Đang gọi HF Spaces
46
+ - `[LLM] 📥 Response status: 200` - HF Spaces trả về response
47
+ - `[LLM] ✅ Got message from API` - Nhận được message từ API
48
+
49
+ Nếu KHÔNG thấy logs này:
50
+ - LLM không được gọi (check `use_llm=True`)
51
+ - LLM generation fail (xem error logs)
52
+ - LLM not available (check `get_llm_generator()`)
53
+
54
+ ## Debug checklist
55
+
56
+ - [ ] Server start thành công (không có database error)
57
+ - [ ] `.env` có `LLM_PROVIDER=api` và `HF_API_BASE_URL=...`
58
+ - [ ] Server load đúng env (restart sau khi sửa `.env`)
59
+ - [ ] Test với câu hỏi có documents (không phải greeting)
60
+ - [ ] Xem server logs để tìm `[LLM]` và `[RAG]` logs
61
+ - [ ] Kiểm tra HF Spaces có đang chạy không
62
+
63
+ ## Nếu vẫn không hoạt động
64
+
65
+ 1. **Kiểm tra LLM có được gọi không:**
66
+ - Xem logs `[RAG] Using LLM provider: api`
67
+ - Nếu không có, check `use_llm=True` trong `rag_pipeline()`
68
+
69
+ 2. **Kiểm tra API call:**
70
+ - Xem logs `[LLM] 🔗 Calling API: ...`
71
+ - Nếu không có, check `_generate_api()` có được gọi không
72
+
73
+ 3. **Kiểm tra response:**
74
+ - Xem logs `[LLM] 📥 Response status: ...`
75
+ - Nếu 200, check response content
76
+ - Nếu error, xem error message
77
+
78
+ 4. **Test trực tiếp API:**
79
+ ```bash
80
+ curl -X POST https://davidtran999-hue-portal-backend.hf.space/api/chatbot/chat/ \
81
+ -H "Content-Type: application/json" \
82
+ -d '{"message": "Test", "reset_session": false}'
83
+ ```
backend/WHY_LLM_NOT_CALLED.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tại sao LLM không được gọi?
2
+
3
+ ## Vấn đề
4
+
5
+ Chatbot đã trả lời được, nhưng response là **template-based** (không phải từ LLM API).
6
+
7
+ ## Nguyên nhân
8
+
9
+ ### 1. Không có documents được tìm thấy
10
+ - Response cho thấy: `"count": 0`, `"results": []`
11
+ - Database chưa có tables hoặc chưa có dữ liệu
12
+
13
+ ### 2. LLM chỉ được gọi khi CÓ documents
14
+
15
+ Trong `rag.py`:
16
+ ```python
17
+ # Try LLM generation first if enabled and documents are available
18
+ if use_llm and documents: # ← Cần có documents
19
+ llm = get_llm_generator()
20
+ if llm:
21
+ llm_answer = llm.generate_answer(...)
22
+ ```
23
+
24
+ **Logic:**
25
+ - Nếu **KHÔNG có documents** → Trả về template message ngay lập tức
26
+ - Nếu **CÓ documents** → Gọi LLM để generate answer
27
+
28
+ ## Giải pháp
29
+
30
+ ### 1. Chạy migrations để tạo tables
31
+ ```bash
32
+ cd backend && source venv/bin/activate && cd hue_portal
33
+ python3 manage.py makemigrations
34
+ python3 manage.py migrate
35
+ ```
36
+
37
+ ### 2. Import/Ingest dữ liệu vào database
38
+ - Cần có dữ liệu về fines, procedures, legal sections, etc.
39
+ - Sau khi có dữ liệu, search sẽ tìm thấy documents
40
+ - Khi có documents, LLM sẽ được gọi
41
+
42
+ ### 3. Test với câu hỏi có documents
43
+ - Nếu database đã có dữ liệu, test với câu hỏi chắc chắn có trong DB
44
+ - Ví dụ: "Mức phạt vượt đèn đỏ" (nếu có dữ liệu về fines)
45
+
46
+ ## Flow hoạt động
47
+
48
+ 1. **User gửi câu hỏi** → `chatbot/views.py`
49
+ 2. **Intent classification** → Xác định loại câu hỏi
50
+ 3. **RAG pipeline** → Tìm documents trong database
51
+ - Nếu **KHÔNG có documents** → Trả về template message
52
+ - Nếu **CÓ documents** → Gọi LLM để generate answer
53
+ 4. **LLM generation** (chỉ khi có documents):
54
+ - `get_llm_generator()` → Lấy LLM instance
55
+ - `llm.generate_answer(query, documents=documents)` → Generate
56
+ - Với API mode: Gọi HF Spaces API với prompt (có documents)
57
+ 5. **Response** → Trả về cho user
58
+
59
+ ## Để test API mode
60
+
61
+ 1. **Đảm bảo database có dữ liệu**
62
+ 2. **Gửi câu hỏi có documents** (ví dụ: "Mức phạt vượt đèn đỏ")
63
+ 3. **Xem server logs** để thấy:
64
+ - `[RAG] Using LLM provider: api`
65
+ - `[LLM] 🔗 Calling API: ...`
66
+ - `[LLM] 📥 Response status: 200`
67
+
68
+ ## Lưu ý
69
+
70
+ - **API mode đã được cấu hình đúng** (`LLM_PROVIDER=api`)
71
+ - **Code đã sửa để gửi prompt (có documents)** thay vì chỉ query
72
+ - **Vấn đề hiện tại:** Database chưa có dữ liệu → Không có documents → LLM không được gọi
73
+
74
+
75
+
76
+
backend/chuyenapichatbot.py CHANGED
File without changes
backend/docs/API_ENDPOINTS.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Chatbot API Endpoints
2
+
3
+ ## Overview
4
+
5
+ This document describes the chatbot API endpoints available in the system.
6
+
7
+ ## Base URL
8
+
9
+ - Default: `http://localhost:8000`
10
+ - Override via env when running test scripts:
11
+ ```bash
12
+ export API_BASE_URL=http://localhost:8090 # e.g. when runserver uses port 8090
13
+ ```
14
+
15
+ ## Endpoints
16
+
17
+ ### 1. Health Check
18
+
19
+ **Endpoint**: `GET /api/chatbot/health/`
20
+
21
+ **Description**: Check the health status of the chatbot service.
22
+
23
+ **Response**:
24
+ ```json
25
+ {
26
+ "status": "healthy",
27
+ "service": "chatbot",
28
+ "classifier_loaded": true
29
+ }
30
+ ```
31
+
32
+ **Example**:
33
+ ```bash
34
+ curl http://localhost:8000/api/chatbot/health/
35
+ ```
36
+
37
+ ### 2. Chat
38
+
39
+ **Endpoint**: `POST /api/chat/`
40
+
41
+ **Description**: Send a message to the chatbot and get a response.
42
+
43
+ **Request Body**:
44
+ ```json
45
+ {
46
+ "message": "Làm thủ tục cư trú cần gì?"
47
+ }
48
+ ```
49
+
50
+ **Response**:
51
+ ```json
52
+ {
53
+ "message": "Tôi tìm thấy 5 thủ tục liên quan đến 'Làm thủ tục cư trú cần gì?':\n\n1. Đăng ký thường trú\n ...",
54
+ "intent": "search_procedure",
55
+ "confidence": 0.95,
56
+ "results": [
57
+ {
58
+ "type": "procedure",
59
+ "data": {
60
+ "id": 1,
61
+ "title": "Đăng ký thường trú",
62
+ "domain": "Cư trú",
63
+ ...
64
+ }
65
+ }
66
+ ],
67
+ "count": 5
68
+ }
69
+ ```
70
+
71
+ **Example**:
72
+ ```bash
73
+ curl -X POST http://localhost:8000/api/chat/ \
74
+ -H "Content-Type: application/json" \
75
+ -d '{"message": "Làm thủ tục cư trú cần gì?"}'
76
+ ```
77
+
78
+ ## Intent Types
79
+
80
+ The chatbot can classify queries into the following intents:
81
+
82
+ - `search_fine`: Search for traffic fines
83
+ - `search_procedure`: Search for administrative procedures
84
+ - `search_office`: Search for office/unit information
85
+ - `search_advisory`: Search for security advisories
86
+ - `general_query`: General queries
87
+ - `greeting`: Greetings
88
+
89
+ ## Response Fields
90
+
91
+ - `message`: The response message to display to the user
92
+ - `intent`: The classified intent
93
+ - `confidence`: Confidence score (0.0 to 1.0)
94
+ - `results`: Array of search results
95
+ - `count`: Number of results found
96
+
97
+ ## Error Handling
98
+
99
+ ### 400 Bad Request
100
+
101
+ ```json
102
+ {
103
+ "error": "message is required"
104
+ }
105
+ ```
106
+
107
+ ### 500 Internal Server Error
108
+
109
+ ```json
110
+ {
111
+ "message": "Xin lỗi, có lỗi xảy ra. Vui lòng thử lại.",
112
+ "intent": "error",
113
+ "error": "Error details",
114
+ "results": [],
115
+ "count": 0
116
+ }
117
+ ```
118
+
119
+ ## Testing
120
+
121
+ Use the provided test script:
122
+
123
+ ```bash
124
+ cd backend
125
+ API_BASE_URL=http://localhost:8090 \\
126
+ POSTGRES_HOST=localhost POSTGRES_PORT=5433 \\
127
+ python scripts/test_api_endpoint.py
128
+ ```
129
+
130
+ The script automatically:
131
+ - Hits `GET /api/chatbot/health/` to confirm classifier loading.
132
+ - Sends six representative queries and reports status, intent, confidence, latency, and first result title.
133
+
134
+ ## API Endpoint Testing & Fixes — 2025-11-14
135
+
136
+ - Added trailing slashes to `backend/hue_portal/chatbot/urls.py` and `backend/hue_portal/core/urls.py` so `/api/chatbot/health/` and `/api/chat/` resolve correctly.
137
+ - Hardened chatbot serialization via `_serialize_document` to avoid `TypeError: Object of type type is not JSON serializable`.
138
+ - Latest test run:
139
+ - Command: `API_BASE_URL=http://localhost:8090 POSTGRES_HOST=localhost POSTGRES_PORT=5433 python scripts/test_api_endpoint.py`
140
+ - Result: **6/6** successful queries, **100 % intent accuracy**, avg latency **~3.7 s** (first call includes SentenceTransformer warm-up).
141
+ - Checklist before running tests:
142
+ 1. `POSTGRES_HOST=localhost POSTGRES_PORT=5433 ../../.venv/bin/python manage.py runserver 0.0.0.0:8090`
143
+ 2. Ensure `API_BASE_URL` matches runserver port.
144
+ 3. (Optional) export `DJANGO_DEBUG=1` for verbose stack traces during local debugging.
145
+
146
+ ## Notes
147
+
148
+ - The API uses RAG (Retrieval-Augmented Generation) pipeline for generating responses
149
+ - Hybrid search (BM25 + Vector similarity) is used for retrieval
150
+ - Intent classification uses ML model with keyword-based fallback
151
+ - Response latency typically ranges from 200-1000ms depending on query complexity
152
+
backend/docs/INTENT_CLASSIFICATION_IMPROVEMENTS.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Intent Classification Improvements
2
+
3
+ ## Overview
4
+
5
+ This document describes the improvements made to intent classification in Plan 5.
6
+
7
+ ## Problem Identified
8
+
9
+ Query "Cảnh báo lừa đảo giả danh công an" was being classified as `search_office` instead of `search_advisory`.
10
+
11
+ ### Root Cause
12
+
13
+ 1. **Keyword Conflict**: The keyword "công an" appears in both `search_office` and queries about `search_advisory`
14
+ 2. **Order of Checks**: The code checked `has_office_keywords` before `has_advisory_keywords`, causing office keywords to match first
15
+ 3. **Limited Training Data**: The `search_advisory` intent had only 7 examples, compared to more examples in other intents
16
+
17
+ ## Solutions Implemented
18
+
19
+ ### 1. Improved Keyword Matching Logic
20
+
21
+ **File**: `backend/hue_portal/chatbot/chatbot.py`
22
+
23
+ - Changed order: Check `has_advisory_keywords` **before** `has_office_keywords`
24
+ - Added more keywords for advisory: "mạo danh", "thủ đoạn", "cảnh giác"
25
+ - This ensures advisory queries are matched first when they contain both advisory and office keywords
26
+
27
+ ### 2. Enhanced Training Data
28
+
29
+ **File**: `backend/hue_portal/chatbot/training/intent_dataset.json`
30
+
31
+ - Expanded `search_advisory` examples from 7 to 23 examples
32
+ - Added specific examples:
33
+ - "cảnh báo lừa đảo giả danh công an"
34
+ - "mạo danh cán bộ công an"
35
+ - "lừa đảo mạo danh"
36
+ - And 15 more variations
37
+
38
+ ### 3. Retrained Model
39
+
40
+ - Retrained intent classification model with improved training data
41
+ - Model accuracy improved
42
+ - Better handling of edge cases
43
+
44
+ ## Results
45
+
46
+ ### Before Improvements
47
+
48
+ - Query "Cảnh báo lừa đảo giả danh công an" → `search_office` (incorrect)
49
+ - Limited training examples for `search_advisory`
50
+
51
+ ### After Improvements
52
+
53
+ - Query "Cảnh báo lừa đảo giả danh công an" → `search_advisory` (correct)
54
+ - More balanced training data across all intents
55
+ - Better keyword matching logic
56
+
57
+ ## Testing
58
+
59
+ Test queries that now work correctly:
60
+
61
+ - "Cảnh báo lừa đảo giả danh công an" → `search_advisory`
62
+ - "Lừa đảo mạo danh cán bộ" → `search_advisory`
63
+ - "Mạo danh cán bộ công an" → `search_advisory`
64
+
65
+ ## 2025-11-14 Update — Serialization & API Regression
66
+
67
+ - Added `_serialize_document` in `backend/hue_portal/chatbot/chatbot.py` so RAG responses return JSON-safe payloads (no more `TypeError: Object of type type is not JSON serializable` when embeddings include model instances).
68
+ - Re-tested intents end-to-end via `scripts/test_api_endpoint.py` (6 queries spanning all intents):
69
+ - **Result:** 6/6 passed, 100 % intent accuracy.
70
+ - **Latency:** avg ~3.7 s (note: first call warms up `keepitreal/vietnamese-sbert-v2`, subsequent calls ≤1.8 s).
71
+ - Health checklist before testing:
72
+ 1. `POSTGRES_HOST=localhost POSTGRES_PORT=5433 ../../.venv/bin/python manage.py runserver 0.0.0.0:8090`
73
+ 2. `API_BASE_URL=http://localhost:8090 python scripts/test_api_endpoint.py`
74
+ 3. Watch server logs for any serialization warnings (none observed after fix).
75
+
76
+ ## Files Modified
77
+
78
+ 1. `backend/hue_portal/chatbot/training/intent_dataset.json` - Enhanced training data
79
+ 2. `backend/hue_portal/chatbot/chatbot.py` - Improved keyword matching logic
80
+ 3. `backend/hue_portal/chatbot/training/artifacts/intent_model.joblib` - Retrained model
81
+
82
+ ## Future Improvements
83
+
84
+ - Continue to add more training examples as edge cases are discovered
85
+ - Consider using more sophisticated ML models (e.g., transformer-based)
86
+ - Implement active learning to automatically improve from user feedback
87
+
backend/docs/LEGAL_REFRESH.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Legal Data Refresh Workflow
2
+
3
+ Use this sequence whenever new DOCX/PDF files are imported outside the user-facing UI (e.g. nightly ETL or bulk manifests).
4
+
5
+ ## Prerequisites
6
+
7
+ - Postgres + Redis running.
8
+ - Celery worker online (for interactive uploads) or `CELERY_TASK_ALWAYS_EAGER=true` for synchronous runs.
9
+ - Tesseract OCR installed (see `OCR_SETUP.md`).
10
+
11
+ ## Manual Command Sequence
12
+
13
+ ```
14
+ cd backend/hue_portal
15
+ source ../.venv/bin/activate
16
+
17
+ python manage.py load_legal_document --file "/path/to/docx" --code DOC-123
18
+ python ../scripts/generate_embeddings.py --model legal
19
+ python ../scripts/build_faiss_index.py --model legal
20
+ ```
21
+
22
+ Notes:
23
+
24
+ - `load_legal_document` can be substituted with the manifest loader (`scripts/load_legal_documents.py`) if multiple files need ingestion.
25
+ - The embedding script logs processed sections; expect a SHA checksum for each chunk.
26
+ - FAISS builder writes artifacts under `backend/hue_portal/artifacts/faiss_indexes`.
27
+
28
+ ## Automated Helper
29
+
30
+ `backend/scripts/refresh_legal_data.sh` wraps the three steps:
31
+
32
+ ```
33
+ ./backend/scripts/refresh_legal_data.sh \
34
+ --file "/path/to/THONG-TU.docx" \
35
+ --code TT-02
36
+ ```
37
+
38
+ Flags:
39
+
40
+ - `--skip-ingest` to only regenerate embeddings/index (useful after editing chunking logic).
41
+ - `--python` to point at a specific interpreter (default `python3`).
42
+
43
+ ## CI / Nightly Jobs
44
+
45
+ 1. Sync new files into `tài nguyên/`.
46
+ 2. Run the helper script for each file (or call the manifest loader first).
47
+ 3. Archive FAISS artifacts (upload to object storage) so the chatbot containers can download them at boot.
48
+ 4. Record build duration and artifact checksums for auditing.
49
+
50
+ ## Verification Checklist
51
+
52
+ - `generate_embeddings` log ends with `Completed model=legal`.
53
+ - FAISS directory contains fresh timestamped `.faiss` + `.mappings.pkl`.
54
+ - Sample chatbot query (“Thông tư 02 ...”) returns snippets referencing the newly ingested document.
55
+
backend/docs/OCR_SETUP.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tesseract OCR Runtime Setup
2
+
3
+ PyMuPDF + `pytesseract` require the native **tesseract-ocr** binary (with Vietnamese language data) to extract text from scanned PDFs. Install it on every environment that runs ingestion or Celery workers.
4
+
5
+ ## Docker / CI (Debian-based)
6
+
7
+ The backend Dockerfile already installs the required packages:
8
+
9
+ ```bash
10
+ apt-get update && apt-get install -y \
11
+ tesseract-ocr \
12
+ tesseract-ocr-eng \
13
+ tesseract-ocr-vie
14
+ ```
15
+
16
+ For GitHub Actions or other CI images, run the same command before executing tests that touch OCR.
17
+
18
+ ## macOS (Homebrew)
19
+
20
+ ```bash
21
+ brew install tesseract
22
+ brew install tesseract-lang # optional (contains vie)
23
+ ```
24
+
25
+ Verify:
26
+
27
+ ```bash
28
+ tesseract --version
29
+ ls /opt/homebrew/Cellar/tesseract/*/share/tessdata/vie.traineddata
30
+ ```
31
+
32
+ ## Ubuntu / Debian
33
+
34
+ ```bash
35
+ sudo apt update
36
+ sudo apt install -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-vie
37
+ ```
38
+
39
+ ## Rocky / CentOS (DNF)
40
+
41
+ ```bash
42
+ sudo dnf install -y tesseract tesseract-langpack-eng tesseract-langpack-vie
43
+ ```
44
+
45
+ ## Configuration
46
+
47
+ - Set `OCR_LANGS` (default `vie+eng`) if additional language combinations are needed.
48
+ - `OCR_PDF_ZOOM` (default `2.0`) controls rasterization DPI; increase for very small fonts.
49
+ - Check that `tesseract` is in `$PATH` for the user running Django/Celery.
50
+
51
+ ## Troubleshooting
52
+
53
+ 1. Run `tesseract --list-langs` to confirm Vietnamese appears.
54
+ 2. Ensure the worker container/user has read access to `/usr/share/tesseract-ocr/4.00/tessdata`.
55
+ 3. If OCR still fails, set `CELERY_TASK_ALWAYS_EAGER=true` locally to debug synchronously and inspect logs for `pytesseract` errors.
56
+
backend/golden_queries_example.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "query": "Mức phạt vượt đèn đỏ là bao nhiêu?",
4
+ "intent": "search_fine",
5
+ "response_message": "Mức phạt vượt đèn đỏ theo Nghị định 100/2019/NĐ-CP là từ 200.000 - 400.000 VNĐ, tùy thuộc vào mức độ vi phạm.",
6
+ "response_data": {
7
+ "message": "Mức phạt vượt đèn đỏ theo Nghị định 100/2019/NĐ-CP là từ 200.000 - 400.000 VNĐ, tùy thuộc vào mức độ vi phạm.",
8
+ "intent": "search_fine",
9
+ "confidence": 0.95,
10
+ "results": [
11
+ {
12
+ "type": "fine",
13
+ "data": {
14
+ "id": 1,
15
+ "name": "Vượt đèn đỏ",
16
+ "code": "V001",
17
+ "min_fine": 200000,
18
+ "max_fine": 400000,
19
+ "article": "Điều 5",
20
+ "decree": "Nghị định 100/2019/NĐ-CP"
21
+ }
22
+ }
23
+ ],
24
+ "count": 1
25
+ },
26
+ "verified_by": "legal_expert",
27
+ "accuracy_score": 1.0
28
+ },
29
+ {
30
+ "query": "Thủ tục đăng ký tạm trú cần những gì?",
31
+ "intent": "search_procedure",
32
+ "response_message": "Thủ tục đăng ký tạm trú cần các giấy tờ sau: CMND/CCCD, giấy tờ chứng minh nơi ở, đơn đăng ký tạm trú. Nộp tại Công an phường/xã nơi tạm trú.",
33
+ "response_data": {
34
+ "message": "Thủ tục đăng ký tạm trú cần các giấy tờ sau: CMND/CCCD, giấy tờ chứng minh nơi ở, đơn đăng ký tạm trú. Nộp tại Công an phường/xã nơi tạm trú.",
35
+ "intent": "search_procedure",
36
+ "confidence": 0.95,
37
+ "results": [
38
+ {
39
+ "type": "procedure",
40
+ "data": {
41
+ "id": 1,
42
+ "title": "Đăng ký tạm trú",
43
+ "domain": "Cư trú",
44
+ "level": "Phường/Xã"
45
+ }
46
+ }
47
+ ],
48
+ "count": 1
49
+ },
50
+ "verified_by": "legal_expert",
51
+ "accuracy_score": 1.0
52
+ },
53
+ {
54
+ "query": "Địa chỉ công an phường ở đâu?",
55
+ "intent": "search_office",
56
+ "response_message": "Địa chỉ công an phường tùy thuộc vào phường bạn đang ở. Bạn có thể tra cứu tại trang web hoặc liên hệ số điện thoại 0234.xxx.xxx để được hướng dẫn.",
57
+ "response_data": {
58
+ "message": "Địa chỉ công an phường tùy thuộc vào phường bạn đang ở. Bạn có thể tra cứu tại trang web hoặc liên hệ số điện thoại 0234.xxx.xxx để được hướng dẫn.",
59
+ "intent": "search_office",
60
+ "confidence": 0.95,
61
+ "results": [],
62
+ "count": 0
63
+ },
64
+ "verified_by": "manual",
65
+ "accuracy_score": 1.0
66
+ }
67
+ ]
68
+
backend/hue_portal/Procfile ADDED
File without changes
backend/hue_portal/chatbot/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Chatbot app for handling conversational queries and natural language processing.
3
+ """
4
+
backend/hue_portal/chatbot/advanced_features.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Advanced features for chatbot: follow-up suggestions, ambiguity detection, explanations.
3
+ """
4
+ from typing import List, Dict, Any, Optional
5
+ from hue_portal.core.models import Fine, Procedure, Office, Advisory
6
+
7
+
8
+ def suggest_follow_up_questions(query: str, results: List[Any], intent: str) -> List[str]:
9
+ """
10
+ Suggest follow-up questions based on query and results.
11
+
12
+ Args:
13
+ query: Original query.
14
+ results: Retrieved results.
15
+ intent: Detected intent.
16
+
17
+ Returns:
18
+ List of suggested follow-up questions.
19
+ """
20
+ suggestions = []
21
+
22
+ if intent == "search_fine":
23
+ if results:
24
+ # Suggest questions about related fines
25
+ suggestions.append("Còn mức phạt nào khác không?")
26
+ suggestions.append("Điều luật liên quan là gì?")
27
+ suggestions.append("Biện pháp khắc phục như thế nào?")
28
+ else:
29
+ suggestions.append("Bạn có thể cho biết cụ thể loại vi phạm không?")
30
+
31
+ elif intent == "search_procedure":
32
+ if results:
33
+ suggestions.append("Hồ sơ cần chuẩn bị gì?")
34
+ suggestions.append("Lệ phí là bao nhiêu?")
35
+ suggestions.append("Thời hạn xử lý là bao lâu?")
36
+ suggestions.append("Nộp hồ sơ ở đâu?")
37
+ else:
38
+ suggestions.append("Bạn muốn tìm thủ tục nào cụ thể?")
39
+
40
+ elif intent == "search_office":
41
+ if results:
42
+ suggestions.append("Số điện thoại liên hệ?")
43
+ suggestions.append("Giờ làm việc như thế nào?")
44
+ suggestions.append("Địa chỉ cụ thể ở đâu?")
45
+ else:
46
+ suggestions.append("Bạn muốn tìm đơn vị nào?")
47
+
48
+ elif intent == "search_advisory":
49
+ if results:
50
+ suggestions.append("Còn cảnh báo nào khác không?")
51
+ suggestions.append("Cách phòng tránh như thế nào?")
52
+ else:
53
+ suggestions.append("Bạn muốn tìm cảnh báo về chủ đề gì?")
54
+
55
+ return suggestions[:3] # Return top 3 suggestions
56
+
57
+
58
+ def detect_ambiguity(query: str, results_count: int, confidence: float) -> Tuple[bool, Optional[str]]:
59
+ """
60
+ Detect if query is ambiguous.
61
+
62
+ Args:
63
+ query: User query.
64
+ results_count: Number of results found.
65
+ confidence: Confidence score.
66
+
67
+ Returns:
68
+ Tuple of (is_ambiguous, ambiguity_reason).
69
+ """
70
+ query_lower = query.lower()
71
+ query_words = query.split()
72
+
73
+ # Very short queries are often ambiguous
74
+ if len(query_words) <= 2:
75
+ return (True, "Câu hỏi quá ngắn, cần thêm thông tin")
76
+
77
+ # Low confidence and many results suggests ambiguity
78
+ if results_count > 10 and confidence < 0.5:
79
+ return (True, "Kết quả quá nhiều, cần cụ thể hơn")
80
+
81
+ # Very generic queries
82
+ generic_queries = ["thông tin", "tìm kiếm", "hỏi", "giúp"]
83
+ if any(gq in query_lower for gq in generic_queries) and len(query_words) <= 3:
84
+ return (True, "Câu hỏi chung chung, cần cụ thể hơn")
85
+
86
+ return (False, None)
87
+
88
+
89
+ def generate_explanation(result: Any, query: str, score: Optional[float] = None) -> str:
90
+ """
91
+ Generate explanation for why a result is relevant.
92
+
93
+ Args:
94
+ result: Result object.
95
+ result_type: Type of result.
96
+ query: Original query.
97
+ score: Relevance score.
98
+
99
+ Returns:
100
+ Explanation string.
101
+ """
102
+ result_type = type(result).__name__.lower()
103
+ explanation_parts = []
104
+
105
+ if "fine" in result_type:
106
+ name = getattr(result, "name", "")
107
+ code = getattr(result, "code", "")
108
+ explanation_parts.append(f"Kết quả này phù hợp vì:")
109
+ if code:
110
+ explanation_parts.append(f"- Mã vi phạm: {code}")
111
+ if name:
112
+ explanation_parts.append(f"- Tên vi phạm: {name}")
113
+ if score:
114
+ explanation_parts.append(f"- Độ phù hợp: {score:.0%}")
115
+
116
+ elif "procedure" in result_type:
117
+ title = getattr(result, "title", "")
118
+ explanation_parts.append(f"Kết quả này phù hợp vì:")
119
+ if title:
120
+ explanation_parts.append(f"- Tên thủ tục: {title}")
121
+ if score:
122
+ explanation_parts.append(f"- Độ phù hợp: {score:.0%}")
123
+
124
+ elif "office" in result_type:
125
+ unit_name = getattr(result, "unit_name", "")
126
+ explanation_parts.append(f"Kết quả này phù hợp vì:")
127
+ if unit_name:
128
+ explanation_parts.append(f"- Tên đơn vị: {unit_name}")
129
+ if score:
130
+ explanation_parts.append(f"- Độ phù hợp: {score:.0%}")
131
+
132
+ elif "advisory" in result_type:
133
+ title = getattr(result, "title", "")
134
+ explanation_parts.append(f"Kết quả này phù hợp vì:")
135
+ if title:
136
+ explanation_parts.append(f"- Tiêu đề: {title}")
137
+ if score:
138
+ explanation_parts.append(f"- Độ phù hợp: {score:.0%}")
139
+
140
+ return "\n".join(explanation_parts) if explanation_parts else "Kết quả này phù hợp với câu hỏi của bạn."
141
+
142
+
143
+ def compare_results(results: List[Any], result_type: str) -> str:
144
+ """
145
+ Compare multiple results and highlight differences.
146
+
147
+ Args:
148
+ results: List of result objects.
149
+ result_type: Type of results.
150
+
151
+ Returns:
152
+ Comparison summary string.
153
+ """
154
+ if len(results) < 2:
155
+ return ""
156
+
157
+ comparison_parts = ["So sánh các kết quả:"]
158
+
159
+ if result_type == "fine":
160
+ # Compare fine amounts
161
+ fine_amounts = []
162
+ for result in results[:3]:
163
+ if hasattr(result, "min_fine") and hasattr(result, "max_fine"):
164
+ if result.min_fine and result.max_fine:
165
+ fine_amounts.append(f"{result.name}: {result.min_fine:,.0f} - {result.max_fine:,.0f} VNĐ")
166
+
167
+ if fine_amounts:
168
+ comparison_parts.extend(fine_amounts)
169
+
170
+ elif result_type == "procedure":
171
+ # Compare procedures by domain/level
172
+ for result in results[:3]:
173
+ title = getattr(result, "title", "")
174
+ domain = getattr(result, "domain", "")
175
+ level = getattr(result, "level", "")
176
+ if title:
177
+ comp = f"- {title}"
178
+ if domain:
179
+ comp += f" ({domain})"
180
+ if level:
181
+ comp += f" - Cấp {level}"
182
+ comparison_parts.append(comp)
183
+
184
+ return "\n".join(comparison_parts)
185
+
backend/hue_portal/chatbot/analytics.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Analytics and monitoring for Dual-Path RAG routing.
3
+ """
4
+ from datetime import datetime, timedelta
5
+ from typing import Dict, Any, List
6
+ from django.db.models import Count, Avg, Q, F
7
+ from django.utils import timezone
8
+
9
+ from hue_portal.core.models import QueryRoutingLog, GoldenQuery
10
+
11
+
12
+ def get_routing_stats(days: int = 7) -> Dict[str, Any]:
13
+ """
14
+ Get routing statistics for the last N days.
15
+
16
+ Args:
17
+ days: Number of days to analyze (default: 7).
18
+
19
+ Returns:
20
+ Dictionary with routing statistics.
21
+ """
22
+ cutoff_date = timezone.now() - timedelta(days=days)
23
+
24
+ logs = QueryRoutingLog.objects.filter(created_at__gte=cutoff_date)
25
+
26
+ total_count = logs.count()
27
+ if total_count == 0:
28
+ return {
29
+ 'total_queries': 0,
30
+ 'fast_path_count': 0,
31
+ 'slow_path_count': 0,
32
+ 'fast_path_percentage': 0.0,
33
+ 'slow_path_percentage': 0.0,
34
+ 'fast_path_avg_time_ms': 0.0,
35
+ 'slow_path_avg_time_ms': 0.0,
36
+ 'router_methods': {},
37
+ 'intent_breakdown': {},
38
+ 'cache_hit_rate': 0.0,
39
+ 'top_golden_queries': [],
40
+ }
41
+
42
+ # Path statistics
43
+ fast_path_count = logs.filter(route='fast_path').count()
44
+ slow_path_count = logs.filter(route='slow_path').count()
45
+
46
+ # Average response times
47
+ fast_path_avg = logs.filter(route='fast_path').aggregate(
48
+ avg_time=Avg('response_time_ms')
49
+ )['avg_time'] or 0.0
50
+
51
+ slow_path_avg = logs.filter(route='slow_path').aggregate(
52
+ avg_time=Avg('response_time_ms')
53
+ )['avg_time'] or 0.0
54
+
55
+ # Router methods breakdown
56
+ router_methods = dict(
57
+ logs.values('router_method')
58
+ .annotate(count=Count('id'))
59
+ .values_list('router_method', 'count')
60
+ )
61
+
62
+ # Intent breakdown
63
+ intent_breakdown = dict(
64
+ logs.values('intent')
65
+ .annotate(count=Count('id'))
66
+ .values_list('intent', 'count')
67
+ )
68
+
69
+ # Cache hit rate (Fast Path usage)
70
+ cache_hit_rate = (fast_path_count / total_count * 100) if total_count > 0 else 0.0
71
+
72
+ # Top golden queries by usage
73
+ top_golden_queries = list(
74
+ GoldenQuery.objects.filter(is_active=True)
75
+ .order_by('-usage_count')[:10]
76
+ .values('id', 'query', 'intent', 'usage_count', 'accuracy_score')
77
+ )
78
+
79
+ return {
80
+ 'total_queries': total_count,
81
+ 'fast_path_count': fast_path_count,
82
+ 'slow_path_count': slow_path_count,
83
+ 'fast_path_percentage': (fast_path_count / total_count * 100) if total_count > 0 else 0.0,
84
+ 'slow_path_percentage': (slow_path_count / total_count * 100) if total_count > 0 else 0.0,
85
+ 'fast_path_avg_time_ms': round(fast_path_avg, 2),
86
+ 'slow_path_avg_time_ms': round(slow_path_avg, 2),
87
+ 'router_methods': router_methods,
88
+ 'intent_breakdown': intent_breakdown,
89
+ 'cache_hit_rate': round(cache_hit_rate, 2),
90
+ 'top_golden_queries': top_golden_queries,
91
+ 'period_days': days,
92
+ }
93
+
94
+
95
+ def get_golden_dataset_stats() -> Dict[str, Any]:
96
+ """
97
+ Get statistics about the golden dataset.
98
+
99
+ Returns:
100
+ Dictionary with golden dataset statistics.
101
+ """
102
+ total_queries = GoldenQuery.objects.count()
103
+ active_queries = GoldenQuery.objects.filter(is_active=True).count()
104
+
105
+ # Intent breakdown
106
+ intent_breakdown = dict(
107
+ GoldenQuery.objects.filter(is_active=True)
108
+ .values('intent')
109
+ .annotate(count=Count('id'))
110
+ .values_list('intent', 'count')
111
+ )
112
+
113
+ # Total usage
114
+ total_usage = GoldenQuery.objects.aggregate(
115
+ total_usage=Count('usage_count')
116
+ )['total_usage'] or 0
117
+
118
+ # Average accuracy
119
+ avg_accuracy = GoldenQuery.objects.filter(is_active=True).aggregate(
120
+ avg_accuracy=Avg('accuracy_score')
121
+ )['avg_accuracy'] or 1.0
122
+
123
+ # Queries with embeddings
124
+ with_embeddings = GoldenQuery.objects.filter(
125
+ is_active=True,
126
+ query_embedding__isnull=False
127
+ ).count()
128
+
129
+ return {
130
+ 'total_queries': total_queries,
131
+ 'active_queries': active_queries,
132
+ 'intent_breakdown': intent_breakdown,
133
+ 'total_usage': total_usage,
134
+ 'avg_accuracy': round(avg_accuracy, 3),
135
+ 'with_embeddings': with_embeddings,
136
+ 'embedding_coverage': (with_embeddings / active_queries * 100) if active_queries > 0 else 0.0,
137
+ }
138
+
139
+
140
+ def get_performance_metrics(days: int = 7) -> Dict[str, Any]:
141
+ """
142
+ Get performance metrics for both paths.
143
+
144
+ Args:
145
+ days: Number of days to analyze.
146
+
147
+ Returns:
148
+ Dictionary with performance metrics.
149
+ """
150
+ cutoff_date = timezone.now() - timedelta(days=days)
151
+ logs = QueryRoutingLog.objects.filter(created_at__gte=cutoff_date)
152
+
153
+ # P95, P99 response times
154
+ fast_path_times = list(
155
+ logs.filter(route='fast_path')
156
+ .values_list('response_time_ms', flat=True)
157
+ .order_by('response_time_ms')
158
+ )
159
+ slow_path_times = list(
160
+ logs.filter(route='slow_path')
161
+ .values_list('response_time_ms', flat=True)
162
+ .order_by('response_time_ms')
163
+ )
164
+
165
+ def percentile(data: List[float], p: float) -> float:
166
+ """Calculate percentile of sorted data."""
167
+ if not data:
168
+ return 0.0
169
+ if len(data) == 1:
170
+ return data[0]
171
+ k = (len(data) - 1) * p
172
+ f = int(k)
173
+ c = k - f
174
+ if f + 1 < len(data):
175
+ return float(data[f] + c * (data[f + 1] - data[f]))
176
+ return float(data[-1])
177
+
178
+ return {
179
+ 'fast_path': {
180
+ 'p50': percentile(fast_path_times, 0.5),
181
+ 'p95': percentile(fast_path_times, 0.95),
182
+ 'p99': percentile(fast_path_times, 0.99),
183
+ 'min': min(fast_path_times) if fast_path_times else 0.0,
184
+ 'max': max(fast_path_times) if fast_path_times else 0.0,
185
+ },
186
+ 'slow_path': {
187
+ 'p50': percentile(slow_path_times, 0.5),
188
+ 'p95': percentile(slow_path_times, 0.95),
189
+ 'p99': percentile(slow_path_times, 0.99),
190
+ 'min': min(slow_path_times) if slow_path_times else 0.0,
191
+ 'max': max(slow_path_times) if slow_path_times else 0.0,
192
+ },
193
+ }
194
+
backend/hue_portal/chatbot/apps.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class ChatbotConfig(AppConfig):
5
+ default_auto_field = 'django.db.models.BigAutoField'
6
+ name = 'hue_portal.chatbot'
7
+
backend/hue_portal/chatbot/cache_monitor.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Monitor Hugging Face model cache directory to track download progress.
3
+ This is a simpler approach that monitors the cache directory size.
4
+ """
5
+ import os
6
+ import time
7
+ import threading
8
+ from pathlib import Path
9
+ from typing import Dict, Optional
10
+ from dataclasses import dataclass, field
11
+
12
+
13
+ @dataclass
14
+ class CacheProgress:
15
+ """Track cache directory size progress."""
16
+ model_path: str
17
+ cache_path: Optional[str] = None
18
+ total_size_bytes: int = 0
19
+ current_size_bytes: int = 0
20
+ files_count: int = 0
21
+ files_completed: int = 0
22
+ last_updated: float = 0.0
23
+ is_monitoring: bool = False
24
+
25
+ @property
26
+ def percentage(self) -> float:
27
+ """Calculate progress percentage."""
28
+ if self.total_size_bytes == 0:
29
+ # Estimate based on typical model sizes
30
+ if "32B" in self.model_path or "32b" in self.model_path:
31
+ estimated_size = 70 * 1024 * 1024 * 1024 # ~70GB for 32B
32
+ elif "7B" in self.model_path or "7b" in self.model_path:
33
+ estimated_size = 15 * 1024 * 1024 * 1024 # ~15GB for 7B
34
+ else:
35
+ estimated_size = 5 * 1024 * 1024 * 1024 # ~5GB default
36
+ return min(100.0, (self.current_size_bytes / estimated_size) * 100.0)
37
+ return min(100.0, (self.current_size_bytes / self.total_size_bytes) * 100.0)
38
+
39
+ @property
40
+ def size_gb(self) -> float:
41
+ """Get current size in GB."""
42
+ return self.current_size_bytes / (1024 ** 3)
43
+
44
+ @property
45
+ def total_size_gb(self) -> float:
46
+ """Get total size in GB."""
47
+ if self.total_size_bytes == 0:
48
+ # Estimate
49
+ if "32B" in self.model_path or "32b" in self.model_path:
50
+ return 70.0
51
+ elif "7B" in self.model_path or "7b" in self.model_path:
52
+ return 15.0
53
+ else:
54
+ return 5.0
55
+ return self.total_size_bytes / (1024 ** 3)
56
+
57
+ def to_dict(self) -> Dict:
58
+ """Convert to dictionary."""
59
+ return {
60
+ "model_path": self.model_path,
61
+ "cache_path": self.cache_path,
62
+ "current_size_bytes": self.current_size_bytes,
63
+ "current_size_gb": round(self.size_gb, 2),
64
+ "total_size_bytes": self.total_size_bytes,
65
+ "total_size_gb": round(self.total_size_gb, 2),
66
+ "percentage": round(self.percentage, 2),
67
+ "files_count": self.files_count,
68
+ "files_completed": self.files_completed,
69
+ "is_monitoring": self.is_monitoring,
70
+ "last_updated": self.last_updated
71
+ }
72
+
73
+
74
+ class CacheMonitor:
75
+ """Monitor cache directory for download progress."""
76
+
77
+ def __init__(self):
78
+ self._progress: Dict[str, CacheProgress] = {}
79
+ self._lock = threading.Lock()
80
+ self._monitoring_threads: Dict[str, threading.Thread] = {}
81
+
82
+ def get_or_create(self, model_path: str) -> CacheProgress:
83
+ """Get or create progress tracker."""
84
+ with self._lock:
85
+ if model_path not in self._progress:
86
+ self._progress[model_path] = CacheProgress(model_path=model_path)
87
+ return self._progress[model_path]
88
+
89
+ def get(self, model_path: str) -> Optional[CacheProgress]:
90
+ """Get progress tracker."""
91
+ with self._lock:
92
+ return self._progress.get(model_path)
93
+
94
+ def _get_cache_path(self, model_path: str) -> Optional[Path]:
95
+ """Get cache path for model."""
96
+ try:
97
+ cache_dir = os.environ.get("HF_HOME") or os.path.expanduser("~/.cache/huggingface")
98
+ repo_id = model_path.replace("/", "--")
99
+ cache_path = Path(cache_dir) / "hub" / f"models--{repo_id}"
100
+ return cache_path if cache_path.exists() else None
101
+ except Exception:
102
+ return None
103
+
104
+ def _monitor_cache(self, model_path: str, interval: float = 2.0):
105
+ """Monitor cache directory size."""
106
+ progress = self.get_or_create(model_path)
107
+ progress.is_monitoring = True
108
+
109
+ cache_path = self._get_cache_path(model_path)
110
+ if cache_path:
111
+ progress.cache_path = str(cache_path)
112
+
113
+ while progress.is_monitoring:
114
+ try:
115
+ if cache_path and cache_path.exists():
116
+ # Calculate current size
117
+ total_size = 0
118
+ file_count = 0
119
+ for file_path in cache_path.rglob("*"):
120
+ if file_path.is_file():
121
+ file_count += 1
122
+ total_size += file_path.stat().st_size
123
+
124
+ progress.current_size_bytes = total_size
125
+ progress.files_count = file_count
126
+ progress.last_updated = time.time()
127
+
128
+ # Check for key files to determine completion
129
+ key_files = ["config.json", "tokenizer.json", "model.safetensors", "pytorch_model.bin"]
130
+ found_files = []
131
+ for key_file in key_files:
132
+ if list(cache_path.rglob(key_file)):
133
+ found_files.append(key_file)
134
+ progress.files_completed = len(found_files)
135
+
136
+ # Estimate total size if not set
137
+ if progress.total_size_bytes == 0 and progress.files_completed == len(key_files):
138
+ # All key files found, use current size as total
139
+ progress.total_size_bytes = total_size
140
+ else:
141
+ # Cache doesn't exist yet, check if it was created
142
+ cache_path = self._get_cache_path(model_path)
143
+ if cache_path:
144
+ progress.cache_path = str(cache_path)
145
+
146
+ time.sleep(interval)
147
+ except Exception as e:
148
+ logger.error(f"Error monitoring cache: {e}")
149
+ time.sleep(interval)
150
+
151
+ def start_monitoring(self, model_path: str, interval: float = 2.0):
152
+ """Start monitoring cache directory."""
153
+ with self._lock:
154
+ if model_path not in self._monitoring_threads:
155
+ thread = threading.Thread(
156
+ target=self._monitor_cache,
157
+ args=(model_path, interval),
158
+ daemon=True
159
+ )
160
+ thread.start()
161
+ self._monitoring_threads[model_path] = thread
162
+
163
+ def stop_monitoring(self, model_path: str):
164
+ """Stop monitoring cache directory."""
165
+ with self._lock:
166
+ progress = self._progress.get(model_path)
167
+ if progress:
168
+ progress.is_monitoring = False
169
+ if model_path in self._monitoring_threads:
170
+ del self._monitoring_threads[model_path]
171
+
172
+ def get_progress(self, model_path: str) -> Optional[Dict]:
173
+ """Get progress as dictionary."""
174
+ progress = self.get(model_path)
175
+ if progress:
176
+ return progress.to_dict()
177
+ return None
178
+
179
+
180
+ # Global monitor instance
181
+ _global_monitor = CacheMonitor()
182
+
183
+
184
+ def get_cache_monitor() -> CacheMonitor:
185
+ """Get global cache monitor instance."""
186
+ return _global_monitor
187
+
188
+
189
+ # Import logger
190
+ import logging
191
+ logger = logging.getLogger(__name__)
192
+
193
+
194
+
195
+
backend/hue_portal/chatbot/chatbot.py ADDED
@@ -0,0 +1,1092 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Chatbot wrapper that integrates core chatbot with router, LLM, and context management.
3
+ """
4
+ import os
5
+ import copy
6
+ import logging
7
+ import json
8
+ import time
9
+ import unicodedata
10
+ import re
11
+ from typing import Dict, Any, Optional
12
+ from hue_portal.core.chatbot import Chatbot as CoreChatbot, get_chatbot as get_core_chatbot
13
+ from hue_portal.chatbot.router import decide_route, IntentRoute, RouteDecision, DOCUMENT_CODE_PATTERNS
14
+ from hue_portal.chatbot.context_manager import ConversationContext
15
+ from hue_portal.chatbot.llm_integration import LLMGenerator
16
+ from hue_portal.core.models import LegalSection, LegalDocument
17
+ from hue_portal.chatbot.exact_match_cache import ExactMatchCache
18
+ from hue_portal.chatbot.slow_path_handler import SlowPathHandler
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ EXACT_MATCH_CACHE = ExactMatchCache(
23
+ max_size=int(os.environ.get("EXACT_MATCH_CACHE_MAX", "256")),
24
+ ttl_seconds=int(os.environ.get("EXACT_MATCH_CACHE_TTL_SECONDS", "43200")),
25
+ )
26
+
27
+ DEBUG_LOG_PATH = "/Users/davidtran/Downloads/TryHarDemNayProject/.cursor/debug.log"
28
+ DEBUG_SESSION_ID = "debug-session"
29
+ DEBUG_RUN_ID = "pre-fix"
30
+
31
+ #region agent log
32
+ def _agent_debug_log(hypothesis_id: str, location: str, message: str, data: Dict[str, Any]):
33
+ try:
34
+ payload = {
35
+ "sessionId": DEBUG_SESSION_ID,
36
+ "runId": DEBUG_RUN_ID,
37
+ "hypothesisId": hypothesis_id,
38
+ "location": location,
39
+ "message": message,
40
+ "data": data,
41
+ "timestamp": int(time.time() * 1000),
42
+ }
43
+ with open(DEBUG_LOG_PATH, "a", encoding="utf-8") as log_file:
44
+ log_file.write(json.dumps(payload, ensure_ascii=False) + "\n")
45
+ except Exception:
46
+ pass
47
+ #endregion
48
+
49
+
50
+ class Chatbot(CoreChatbot):
51
+ """
52
+ Enhanced chatbot with session support, routing, and RAG capabilities.
53
+ """
54
+
55
+ def __init__(self):
56
+ super().__init__()
57
+ self.llm_generator = None
58
+ # Cache in-memory: giữ câu trả lời legal gần nhất theo session để xử lý follow-up nhanh
59
+ self._last_legal_answer_by_session: Dict[str, str] = {}
60
+ self._initialize_llm()
61
+
62
+ def _initialize_llm(self):
63
+ """Initialize LLM generator if needed."""
64
+ try:
65
+ self.llm_generator = LLMGenerator()
66
+ except Exception as e:
67
+ print(f"⚠️ LLM generator not available: {e}")
68
+ self.llm_generator = None
69
+
70
+ def generate_response(self, query: str, session_id: Optional[str] = None) -> Dict[str, Any]:
71
+ """
72
+ Generate chatbot response with session support and routing.
73
+
74
+ Args:
75
+ query: User query string
76
+ session_id: Optional session ID for conversation context
77
+
78
+ Returns:
79
+ Response dictionary with message, intent, results, etc.
80
+ """
81
+ query = query.strip()
82
+
83
+ # Save user message to context
84
+ if session_id:
85
+ try:
86
+ ConversationContext.add_message(
87
+ session_id=session_id,
88
+ role="user",
89
+ content=query
90
+ )
91
+ except Exception as e:
92
+ print(f"⚠️ Failed to save user message: {e}")
93
+
94
+ session_metadata: Dict[str, Any] = {}
95
+ selected_doc_code: Optional[str] = None
96
+ if session_id:
97
+ try:
98
+ session_metadata = ConversationContext.get_session_metadata(session_id)
99
+ selected_doc_code = session_metadata.get("selected_document_code")
100
+ except Exception:
101
+ session_metadata = {}
102
+
103
+ # Classify intent
104
+ intent, confidence = self.classify_intent(query)
105
+
106
+ # Router decision (using raw intent)
107
+ route_decision = decide_route(query, intent, confidence)
108
+
109
+ # Use forced intent if router suggests it
110
+ if route_decision.forced_intent:
111
+ intent = route_decision.forced_intent
112
+
113
+ # Nếu session đã có selected_document_code (user đã chọn văn bản ở wizard)
114
+ # thì luôn ép intent về search_legal và route sang SEARCH,
115
+ # tránh bị kẹt ở nhánh small-talk/off-topic do nội dung câu hỏi ban đầu.
116
+ if selected_doc_code:
117
+ intent = "search_legal"
118
+ route_decision.route = IntentRoute.SEARCH
119
+ route_decision.forced_intent = "search_legal"
120
+
121
+ # Map tất cả intent tra cứu nội dung về search_legal
122
+ domain_search_intents = {
123
+ "search_fine",
124
+ "search_procedure",
125
+ "search_office",
126
+ "search_advisory",
127
+ "general_query",
128
+ }
129
+ if intent in domain_search_intents:
130
+ intent = "search_legal"
131
+ route_decision.route = IntentRoute.SEARCH
132
+ route_decision.forced_intent = "search_legal"
133
+
134
+ # Instant exact-match cache lookup
135
+ # ⚠️ Tắt cache cho intent search_legal để luôn đi qua wizard / Slow Path,
136
+ # tránh trả lại các câu trả lời cũ không có options.
137
+ cached_response = None
138
+ if intent != "search_legal":
139
+ cached_response = EXACT_MATCH_CACHE.get(query, intent)
140
+ if cached_response:
141
+ cached_response["_cache"] = "exact_match"
142
+ cached_response["_source"] = cached_response.get("_source", "cache")
143
+ cached_response.setdefault("routing", route_decision.route.value)
144
+ logger.info(
145
+ "[CACHE] Hit for intent=%s route=%s source=%s",
146
+ intent,
147
+ route_decision.route.value,
148
+ cached_response["_source"],
149
+ )
150
+ if session_id:
151
+ cached_response["session_id"] = session_id
152
+ if session_id:
153
+ try:
154
+ ConversationContext.add_message(
155
+ session_id=session_id,
156
+ role="bot",
157
+ content=cached_response.get("message", ""),
158
+ intent=intent,
159
+ )
160
+ except Exception as e:
161
+ print(f"⚠️ Failed to save cached bot message: {e}")
162
+ return cached_response
163
+
164
+ # Wizard / option-first ngay tại chatbot layer:
165
+ # Multi-stage wizard flow:
166
+ # Stage 1: Choose document (if no document selected)
167
+ # Stage 2: Choose topic/section (if document selected but no topic)
168
+ # Stage 3: Choose detail (if topic selected, ask for more details)
169
+ # Final: Answer (when user says "Không" or after detail selection)
170
+ disable_wizard_flow = os.environ.get("DISABLE_WIZARD_FLOW", "false").lower() == "true"
171
+ print(f"[WIZARD] DISABLE_WIZARD_FLOW={os.environ.get('DISABLE_WIZARD_FLOW', 'false')} -> disable_wizard_flow={disable_wizard_flow}")
172
+
173
+ has_doc_code_in_query = self._query_has_document_code(query)
174
+ wizard_stage = session_metadata.get("wizard_stage") if session_metadata else None
175
+ selected_topic = session_metadata.get("selected_topic") if session_metadata else None
176
+ wizard_depth = session_metadata.get("wizard_depth", 0) if session_metadata else 0
177
+
178
+ print(f"[WIZARD] Chatbot layer check - intent={intent}, wizard_stage={wizard_stage}, selected_doc_code={selected_doc_code}, selected_topic={selected_topic}, has_doc_code_in_query={has_doc_code_in_query}, query='{query[:50]}'")
179
+
180
+ # CRITICAL: If wizard flow is disabled, reset all wizard state immediately
181
+ if disable_wizard_flow:
182
+ print("[WIZARD] 🚫 Wizard flow DISABLED - resetting all wizard state and skipping wizard stages")
183
+ selected_doc_code = None
184
+ selected_topic = None
185
+ wizard_stage = None
186
+ wizard_depth = 0
187
+ # Update session metadata to clear wizard state
188
+ if session_id:
189
+ try:
190
+ ConversationContext.update_session_metadata(
191
+ session_id,
192
+ {
193
+ "selected_document_code": None,
194
+ "selected_topic": None,
195
+ "wizard_stage": None,
196
+ "wizard_depth": 0,
197
+ }
198
+ )
199
+ print("[WIZARD] ✅ Wizard state cleared from session metadata")
200
+ except Exception as e:
201
+ print(f"⚠️ Failed to clear wizard state: {e}")
202
+ # Also update session_metadata dict for current function scope
203
+ if session_metadata:
204
+ session_metadata["selected_document_code"] = None
205
+ session_metadata["selected_topic"] = None
206
+ session_metadata["wizard_stage"] = None
207
+ session_metadata["wizard_depth"] = 0
208
+
209
+ # Reset wizard state if new query doesn't have document code and wizard_stage is "answer"
210
+ # This handles the case where user asks a new question after completing a previous wizard flow
211
+ # CRITICAL: Check conditions and reset BEFORE Stage 1 check
212
+ should_reset = (
213
+ not disable_wizard_flow
214
+ and intent == "search_legal"
215
+ and not has_doc_code_in_query
216
+ and wizard_stage == "answer"
217
+ )
218
+ print(f"[WIZARD] Reset check - intent={intent}, has_doc_code={has_doc_code_in_query}, wizard_stage={wizard_stage}, should_reset={should_reset}") # v2.0-fix
219
+
220
+ if should_reset:
221
+ print("[WIZARD] 🔄 New query detected, resetting wizard state for fresh start")
222
+ selected_doc_code = None
223
+ selected_topic = None
224
+ wizard_stage = None
225
+ # Update session metadata FIRST before continuing
226
+ if session_id:
227
+ try:
228
+ ConversationContext.update_session_metadata(
229
+ session_id,
230
+ {
231
+ "selected_document_code": None,
232
+ "selected_topic": None,
233
+ "wizard_stage": None,
234
+ "wizard_depth": 0,
235
+ }
236
+ )
237
+ print("[WIZARD] ✅ Wizard state reset in session metadata")
238
+ except Exception as e:
239
+ print(f"⚠️ Failed to reset wizard state: {e}")
240
+ # Also update session_metadata dict for current function scope
241
+ if session_metadata:
242
+ session_metadata["selected_document_code"] = None
243
+ session_metadata["selected_topic"] = None
244
+ session_metadata["wizard_stage"] = None
245
+ session_metadata["wizard_depth"] = 0
246
+
247
+ # Stage 1: Choose document (if no document selected and no code in query)
248
+ # Use Query Rewrite Strategy from slow_path_handler instead of old LLM suggestions
249
+ if (
250
+ intent == "search_legal"
251
+ and not selected_doc_code
252
+ and not has_doc_code_in_query
253
+ and not disable_wizard_flow
254
+ ):
255
+ print("[WIZARD] ✅ Stage 1: Using Query Rewrite Strategy from slow_path_handler")
256
+ # Delegate to slow_path_handler which has Query Rewrite Strategy
257
+ slow_handler = SlowPathHandler()
258
+ response = slow_handler.handle(
259
+ query=query,
260
+ intent=intent,
261
+ session_id=session_id,
262
+ selected_document_code=None, # No document selected yet
263
+ )
264
+
265
+ # Ensure response has wizard metadata
266
+ if response:
267
+ response.setdefault("wizard_stage", "choose_document")
268
+ response.setdefault("routing", "legal_wizard")
269
+ response.setdefault("type", "options")
270
+
271
+ # Update session metadata
272
+ if session_id:
273
+ try:
274
+ ConversationContext.update_session_metadata(
275
+ session_id,
276
+ {
277
+ "wizard_stage": "choose_document",
278
+ "wizard_depth": 1,
279
+ }
280
+ )
281
+ except Exception as e:
282
+ logger.warning("[WIZARD] Failed to update session metadata: %s", e)
283
+
284
+ # Save bot message to context
285
+ if session_id:
286
+ try:
287
+ bot_message = response.get("message") or response.get("clarification", {}).get("message", "")
288
+ ConversationContext.add_message(
289
+ session_id=session_id,
290
+ role="bot",
291
+ content=bot_message,
292
+ intent=intent,
293
+ )
294
+ except Exception as e:
295
+ print(f"⚠️ Failed to save wizard bot message: {e}")
296
+
297
+ return response if response else {
298
+ "message": "Xin lỗi, có lỗi xảy ra khi tìm kiếm văn bản.",
299
+ "intent": intent,
300
+ "results": [],
301
+ "count": 0,
302
+ }
303
+
304
+ # Stage 2: Choose topic/section (if document selected but no topic yet)
305
+ # Skip if wizard_stage is already "answer" (user wants final answer)
306
+ if (
307
+ intent == "search_legal"
308
+ and selected_doc_code
309
+ and not selected_topic
310
+ and not has_doc_code_in_query
311
+ and wizard_stage != "answer"
312
+ and not disable_wizard_flow
313
+ ):
314
+ print("[WIZARD] ✅ Stage 2 triggered: Choose topic/section")
315
+
316
+ # Get document title
317
+ document_title = selected_doc_code
318
+ try:
319
+ doc = LegalDocument.objects.filter(code=selected_doc_code).first()
320
+ if doc:
321
+ document_title = getattr(doc, "title", "") or selected_doc_code
322
+ except Exception:
323
+ pass
324
+
325
+ # Extract keywords from query for parallel search
326
+ search_keywords_from_query = []
327
+ if self.llm_generator:
328
+ try:
329
+ conversation_context = None
330
+ if session_id:
331
+ try:
332
+ recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
333
+ conversation_context = [
334
+ {"role": msg.role, "content": msg.content}
335
+ for msg in recent_messages
336
+ ]
337
+ except Exception:
338
+ pass
339
+
340
+ search_keywords_from_query = self.llm_generator.extract_search_keywords(
341
+ query=query,
342
+ selected_options=None, # No options selected yet
343
+ conversation_context=conversation_context,
344
+ )
345
+ print(f"[WIZARD] Extracted keywords: {search_keywords_from_query[:5]}")
346
+ except Exception as exc:
347
+ logger.warning("[WIZARD] Keyword extraction failed: %s", exc)
348
+
349
+ # Fallback to simple keyword extraction
350
+ if not search_keywords_from_query:
351
+ search_keywords_from_query = self.chatbot.extract_keywords(query)
352
+
353
+ # Trigger parallel search for document (if not already done)
354
+ slow_handler = SlowPathHandler()
355
+ prefetched_results = slow_handler._get_prefetched_results(session_id, "document_results")
356
+
357
+ if not prefetched_results:
358
+ # Trigger parallel search now
359
+ slow_handler._parallel_search_prepare(
360
+ document_code=selected_doc_code,
361
+ keywords=search_keywords_from_query,
362
+ session_id=session_id,
363
+ )
364
+ logger.info("[WIZARD] Triggered parallel search for document")
365
+
366
+ # Get prefetched search results from parallel search (if available)
367
+ prefetched_results = slow_handler._get_prefetched_results(session_id, "document_results")
368
+ search_results = []
369
+
370
+ if prefetched_results:
371
+ search_results = prefetched_results.get("results", [])
372
+ logger.info("[WIZARD] Using prefetched results: %d sections", len(search_results))
373
+ else:
374
+ # Fallback: search synchronously if prefetch not ready
375
+ search_result = slow_handler._search_by_intent(
376
+ intent="search_legal",
377
+ query=query,
378
+ limit=20,
379
+ preferred_document_code=selected_doc_code.upper(),
380
+ )
381
+ search_results = search_result.get("results", [])
382
+ logger.info("[WIZARD] Fallback search: %d sections", len(search_results))
383
+
384
+ # Extract keywords for topic options
385
+ conversation_context = None
386
+ if session_id:
387
+ try:
388
+ recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
389
+ conversation_context = [
390
+ {"role": msg.role, "content": msg.content}
391
+ for msg in recent_messages
392
+ ]
393
+ except Exception:
394
+ pass
395
+
396
+ # Use LLM to generate topic options
397
+ topic_options = []
398
+ intro_message = f"Bạn muốn tìm điều khoản/chủ đề nào cụ thể trong {document_title}?"
399
+ search_keywords = []
400
+
401
+ if self.llm_generator:
402
+ try:
403
+ llm_payload = self.llm_generator.suggest_topic_options(
404
+ query=query,
405
+ document_code=selected_doc_code,
406
+ document_title=document_title,
407
+ search_results=search_results[:10], # Top 10 for options
408
+ conversation_context=conversation_context,
409
+ max_options=3,
410
+ )
411
+ if llm_payload:
412
+ intro_message = llm_payload.get("message") or intro_message
413
+ topic_options = llm_payload.get("options", [])
414
+ search_keywords = llm_payload.get("search_keywords", [])
415
+ print(f"[WIZARD] ✅ LLM generated {len(topic_options)} topic options")
416
+ except Exception as exc:
417
+ logger.warning("[WIZARD] LLM topic suggestion failed: %s", exc)
418
+
419
+ # Fallback: build options from search results
420
+ if not topic_options and search_results:
421
+ for result in search_results[:3]:
422
+ data = result.get("data", {})
423
+ section_title = data.get("section_title") or data.get("title") or ""
424
+ article = data.get("article") or data.get("article_number") or ""
425
+ if section_title or article:
426
+ topic_options.append({
427
+ "title": section_title or article,
428
+ "article": article,
429
+ "reason": data.get("excerpt", "")[:100] or "",
430
+ "keywords": [],
431
+ })
432
+
433
+ # If still no options, create generic ones
434
+ if not topic_options:
435
+ topic_options = [
436
+ {
437
+ "title": "Các điều khoản liên quan",
438
+ "article": "",
439
+ "reason": "Tìm kiếm các điều khoản liên quan đến câu hỏi của bạn",
440
+ "keywords": [],
441
+ }
442
+ ]
443
+
444
+ # Trigger parallel search for selected keywords
445
+ if search_keywords:
446
+ slow_handler._parallel_search_topic(
447
+ document_code=selected_doc_code,
448
+ topic_keywords=search_keywords,
449
+ session_id=session_id,
450
+ )
451
+
452
+ response = {
453
+ "message": intro_message,
454
+ "intent": intent,
455
+ "confidence": confidence,
456
+ "results": [],
457
+ "count": 0,
458
+ "routing": "legal_wizard",
459
+ "type": "options",
460
+ "wizard_stage": "choose_topic",
461
+ "clarification": {
462
+ "message": intro_message,
463
+ "options": topic_options,
464
+ },
465
+ "options": topic_options,
466
+ }
467
+ if session_id:
468
+ response["session_id"] = session_id
469
+ try:
470
+ ConversationContext.add_message(
471
+ session_id=session_id,
472
+ role="bot",
473
+ content=intro_message,
474
+ intent=intent,
475
+ )
476
+ ConversationContext.update_session_metadata(
477
+ session_id,
478
+ {
479
+ "wizard_stage": "choose_topic",
480
+ },
481
+ )
482
+ except Exception as e:
483
+ print(f"⚠️ Failed to save Stage 2 bot message: {e}")
484
+ return response
485
+
486
+ # Stage 3: Choose detail (if topic selected, ask if user wants more details)
487
+ # Skip if wizard_stage is already "answer" (user wants final answer)
488
+ if intent == "search_legal" and selected_doc_code and selected_topic and wizard_stage != "answer":
489
+ # Check if user is asking for more details or saying "Không"
490
+ query_lower = query.lower()
491
+ wants_more = any(kw in query_lower for kw in ["có", "cần", "muốn", "thêm", "chi tiết", "nữa"])
492
+ says_no = any(kw in query_lower for kw in ["không", "khong", "thôi", "đủ", "xong"])
493
+
494
+ if says_no or wizard_depth >= 2:
495
+ # User doesn't want more details or already asked twice - proceed to final answer
496
+ print("[WIZARD] ✅ User wants final answer, proceeding to slow_path")
497
+ # Clear wizard stage to allow normal answer flow
498
+ if session_id:
499
+ try:
500
+ ConversationContext.update_session_metadata(
501
+ session_id,
502
+ {
503
+ "wizard_stage": "answer",
504
+ },
505
+ )
506
+ except Exception:
507
+ pass
508
+ elif wants_more or wizard_depth == 0:
509
+ # User wants more details - generate detail options
510
+ print("[WIZARD] ✅ Stage 3 triggered: Choose detail")
511
+
512
+ # Get conversation context
513
+ conversation_context = None
514
+ if session_id:
515
+ try:
516
+ recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
517
+ conversation_context = [
518
+ {"role": msg.role, "content": msg.content}
519
+ for msg in recent_messages
520
+ ]
521
+ except Exception:
522
+ pass
523
+
524
+ # Use LLM to generate detail options
525
+ detail_options = []
526
+ intro_message = "Bạn muốn chi tiết gì cho chủ đề này nữa không?"
527
+ search_keywords = []
528
+
529
+ if self.llm_generator:
530
+ try:
531
+ llm_payload = self.llm_generator.suggest_detail_options(
532
+ query=query,
533
+ selected_document_code=selected_doc_code,
534
+ selected_topic=selected_topic,
535
+ conversation_context=conversation_context,
536
+ max_options=3,
537
+ )
538
+ if llm_payload:
539
+ intro_message = llm_payload.get("message") or intro_message
540
+ detail_options = llm_payload.get("options", [])
541
+ search_keywords = llm_payload.get("search_keywords", [])
542
+ print(f"[WIZARD] ✅ LLM generated {len(detail_options)} detail options")
543
+ except Exception as exc:
544
+ logger.warning("[WIZARD] LLM detail suggestion failed: %s", exc)
545
+
546
+ # Fallback options
547
+ if not detail_options:
548
+ detail_options = [
549
+ {
550
+ "title": "Thẩm quyền xử lý",
551
+ "reason": "Tìm hiểu về thẩm quyền xử lý kỷ luật",
552
+ "keywords": ["thẩm quyền", "xử lý"],
553
+ },
554
+ {
555
+ "title": "Trình tự, thủ tục",
556
+ "reason": "Tìm hiểu về trình tự, thủ tục xử lý",
557
+ "keywords": ["trình tự", "thủ tục"],
558
+ },
559
+ {
560
+ "title": "Hình thức kỷ luật",
561
+ "reason": "Tìm hiểu về các hình thức kỷ luật",
562
+ "keywords": ["hình thức", "kỷ luật"],
563
+ },
564
+ ]
565
+
566
+ # Trigger parallel search for detail keywords
567
+ if search_keywords and session_id:
568
+ slow_handler = SlowPathHandler()
569
+ slow_handler._parallel_search_topic(
570
+ document_code=selected_doc_code,
571
+ topic_keywords=search_keywords,
572
+ session_id=session_id,
573
+ )
574
+
575
+ response = {
576
+ "message": intro_message,
577
+ "intent": intent,
578
+ "confidence": confidence,
579
+ "results": [],
580
+ "count": 0,
581
+ "routing": "legal_wizard",
582
+ "type": "options",
583
+ "wizard_stage": "choose_detail",
584
+ "clarification": {
585
+ "message": intro_message,
586
+ "options": detail_options,
587
+ },
588
+ "options": detail_options,
589
+ }
590
+ if session_id:
591
+ response["session_id"] = session_id
592
+ try:
593
+ ConversationContext.add_message(
594
+ session_id=session_id,
595
+ role="bot",
596
+ content=intro_message,
597
+ intent=intent,
598
+ )
599
+ ConversationContext.update_session_metadata(
600
+ session_id,
601
+ {
602
+ "wizard_stage": "choose_detail",
603
+ "wizard_depth": wizard_depth + 1,
604
+ },
605
+ )
606
+ except Exception as e:
607
+ print(f"⚠️ Failed to save Stage 3 bot message: {e}")
608
+ return response
609
+
610
+ # Always send legal intent through Slow Path RAG
611
+ if intent == "search_legal":
612
+ response = self._run_slow_path_legal(
613
+ query,
614
+ intent,
615
+ session_id,
616
+ route_decision,
617
+ session_metadata=session_metadata,
618
+ )
619
+ elif route_decision.route == IntentRoute.GREETING:
620
+ response = {
621
+ "message": "Xin chào! Tôi có thể giúp bạn tra cứu các thông tin liên quan về các văn bản quy định pháp luật về xử lí kỷ luật cán bộ đảng viên",
622
+ "intent": "greeting",
623
+ "confidence": 0.9,
624
+ "results": [],
625
+ "count": 0,
626
+ "routing": "greeting"
627
+ }
628
+
629
+ elif route_decision.route == IntentRoute.SMALL_TALK:
630
+ # Xử lý follow-up questions trong context
631
+ follow_up_keywords = [
632
+ "có điều khoản",
633
+ "liên quan",
634
+ "khác",
635
+ "nữa",
636
+ "thêm",
637
+ "tóm tắt",
638
+ "tải file",
639
+ "tải",
640
+ "download",
641
+ ]
642
+ query_lower = query.lower()
643
+ is_follow_up = any(kw in query_lower for kw in follow_up_keywords)
644
+ #region agent log
645
+ _agent_debug_log(
646
+ hypothesis_id="H2",
647
+ location="chatbot.py:119",
648
+ message="follow_up_detection",
649
+ data={
650
+ "query": query,
651
+ "is_follow_up": is_follow_up,
652
+ "session_id_present": bool(session_id),
653
+ },
654
+ )
655
+ #endregion
656
+
657
+ response = None
658
+
659
+ # Nếu là follow-up question, ưu tiên dùng context legal gần nhất trong session
660
+ if is_follow_up and session_id:
661
+ previous_answer = self._last_legal_answer_by_session.get(session_id, "")
662
+
663
+ # Nếu chưa có trong cache in-memory, fallback sang ConversationContext DB
664
+ if not previous_answer:
665
+ try:
666
+ recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
667
+ for msg in reversed(recent_messages):
668
+ if msg.role == "bot" and msg.intent == "search_legal":
669
+ previous_answer = msg.content or ""
670
+ break
671
+ except Exception as e:
672
+ logger.warning("[FOLLOW_UP] Failed to load context from DB: %s", e)
673
+
674
+ if previous_answer:
675
+ if "tóm tắt" in query_lower:
676
+ summary_message = None
677
+ if getattr(self, "llm_generator", None):
678
+ try:
679
+ prompt = (
680
+ "Bạn là chuyên gia pháp luật. Hãy tóm tắt ngắn gọn, rõ ràng nội dung chính của đoạn sau "
681
+ "(giữ nguyên tinh thần và các mức, tỷ lệ, hình thức kỷ luật nếu có):\n\n"
682
+ f"{previous_answer}"
683
+ )
684
+ summary_message = self.llm_generator.generate_answer(
685
+ prompt,
686
+ context=None,
687
+ documents=None,
688
+ )
689
+ except Exception as e:
690
+ logger.warning("[FOLLOW_UP] LLM summary failed: %s", e)
691
+
692
+ if summary_message:
693
+ message = summary_message
694
+ else:
695
+ content_preview = (
696
+ previous_answer[:400] + "..." if len(previous_answer) > 400 else previous_answer
697
+ )
698
+ message = "Tóm tắt nội dung chính của điều khoản trước đó:\n\n" f"{content_preview}"
699
+ elif "tải" in query_lower:
700
+ message = (
701
+ "Bạn có thể tải file gốc của văn bản tại mục Quản lý văn bản trên hệ thống "
702
+ "hoặc liên hệ cán bộ phụ trách để được cung cấp bản đầy đủ."
703
+ )
704
+ else:
705
+ message = (
706
+ "Trong câu trả lời trước, tôi đã trích dẫn điều khoản chính liên quan. "
707
+ "Nếu bạn cần điều khoản khác (ví dụ về thẩm quyền, trình tự, hồ sơ), "
708
+ "hãy nêu rõ nội dung muốn tìm để tôi trợ giúp nhanh nhất."
709
+ )
710
+
711
+ response = {
712
+ "message": message,
713
+ "intent": "search_legal",
714
+ "confidence": 0.85,
715
+ "results": [],
716
+ "count": 0,
717
+ "routing": "follow_up",
718
+ }
719
+
720
+ # Nếu không phải follow-up hoặc không tìm thấy context, trả về message thân thiện
721
+ if response is None:
722
+ #region agent log
723
+ _agent_debug_log(
724
+ hypothesis_id="H1",
725
+ location="chatbot.py:193",
726
+ message="follow_up_fallback",
727
+ data={
728
+ "is_follow_up": is_follow_up,
729
+ "session_id_present": bool(session_id),
730
+ },
731
+ )
732
+ #endregion
733
+ # Detect off-topic questions (nấu ăn, chả trứng, etc.)
734
+ off_topic_keywords = ["nấu", "nau", "chả trứng", "cha trung", "món ăn", "mon an", "công thức", "cong thuc",
735
+ "cách làm", "cach lam", "đổ chả", "do cha", "trứng", "trung"]
736
+ is_off_topic = any(kw in query_lower for kw in off_topic_keywords)
737
+
738
+ if is_off_topic:
739
+ # Ngoài phạm vi → từ chối lịch sự + gợi ý wizard với các văn bản pháp lý chính
740
+ intro_message = (
741
+ "Xin lỗi, tôi là chatbot chuyên về tra cứu các văn bản quy định pháp luật "
742
+ "về xử lí kỷ luật cán bộ đảng viên của Phòng Thanh Tra - Công An Thành Phố Huế.\n\n"
743
+ "Tôi không thể trả lời các câu hỏi về nấu ăn, công thức nấu ăn hay các chủ đề khác ngoài phạm vi pháp luật.\n\n"
744
+ "Tuy nhiên, tôi có thể giúp bạn tra cứu một số văn bản pháp luật quan trọng. "
745
+ "Bạn hãy chọn văn bản muốn xem trước:"
746
+ )
747
+ clarification_options = [
748
+ {
749
+ "code": "264-QD-TW",
750
+ "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
751
+ "reason": "Quy định chung về xử lý kỷ luật đối với đảng viên vi phạm.",
752
+ },
753
+ {
754
+ "code": "QD-69-TW",
755
+ "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
756
+ "reason": "Quy định chi tiết về các hành vi vi phạm và hình thức kỷ luật.",
757
+ },
758
+ {
759
+ "code": "TT-02-CAND",
760
+ "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
761
+ "reason": "Quy định về điều lệnh, lễ tiết, tác phong trong CAND.",
762
+ },
763
+ {
764
+ "code": "__other__",
765
+ "title": "Khác",
766
+ "reason": "Tôi muốn hỏi văn bản hoặc chủ đề pháp luật khác.",
767
+ },
768
+ ]
769
+ response = {
770
+ "message": intro_message,
771
+ "intent": intent,
772
+ "confidence": confidence,
773
+ "results": [],
774
+ "count": 0,
775
+ "routing": "small_talk_offtopic_wizard",
776
+ "type": "options",
777
+ "wizard_stage": "choose_document",
778
+ "clarification": {
779
+ "message": intro_message,
780
+ "options": clarification_options,
781
+ },
782
+ "options": clarification_options,
783
+ }
784
+ else:
785
+ message = (
786
+ "Tôi có thể giúp bạn tra cứu các văn bản quy định pháp luật về xử lí kỷ luật cán bộ đảng viên. "
787
+ "Bạn muốn tìm gì?"
788
+ )
789
+ response = {
790
+ "message": message,
791
+ "intent": intent,
792
+ "confidence": confidence,
793
+ "results": [],
794
+ "count": 0,
795
+ "routing": "small_talk",
796
+ }
797
+
798
+ else: # IntentRoute.SEARCH
799
+ # Use core chatbot search for other intents
800
+ search_result = self.search_by_intent(intent, query, limit=5)
801
+
802
+ # Generate response message
803
+ if search_result["count"] > 0:
804
+ template = self._get_response_template(intent)
805
+ message = template.format(
806
+ count=search_result["count"],
807
+ query=query
808
+ )
809
+ else:
810
+ message = f"Xin lỗi, tôi không tìm thấy thông tin liên quan đến '{query}'. Vui lòng thử lại với từ khóa khác."
811
+
812
+ response = {
813
+ "message": message,
814
+ "intent": intent,
815
+ "confidence": confidence,
816
+ "results": search_result["results"],
817
+ "count": search_result["count"],
818
+ "routing": "search"
819
+ }
820
+
821
+ if session_id and intent == "search_legal":
822
+ try:
823
+ self._last_legal_answer_by_session[session_id] = response.get("message", "") or ""
824
+ except Exception:
825
+ pass
826
+
827
+ # Đánh dấu loại payload cho frontend: answer hay options (wizard)
828
+ if response.get("clarification") or response.get("type") == "options":
829
+ response.setdefault("type", "options")
830
+ else:
831
+ response.setdefault("type", "answer")
832
+
833
+ # Add session_id
834
+ if session_id:
835
+ response["session_id"] = session_id
836
+
837
+ # Save bot response to context
838
+ if session_id:
839
+ try:
840
+ bot_message = response.get("message") or response.get("clarification", {}).get("message", "")
841
+ ConversationContext.add_message(
842
+ session_id=session_id,
843
+ role="bot",
844
+ content=bot_message,
845
+ intent=intent
846
+ )
847
+ except Exception as e:
848
+ print(f"⚠️ Failed to save bot message: {e}")
849
+
850
+ self._cache_response(query, intent, response)
851
+
852
+ return response
853
+
854
+ def _run_slow_path_legal(
855
+ self,
856
+ query: str,
857
+ intent: str,
858
+ session_id: Optional[str],
859
+ route_decision: RouteDecision,
860
+ session_metadata: Optional[Dict[str, Any]] = None,
861
+ ) -> Dict[str, Any]:
862
+ """Execute Slow Path legal handler (with fast-path + structured output)."""
863
+ slow_handler = SlowPathHandler()
864
+ selected_doc_code = None
865
+ if session_metadata:
866
+ selected_doc_code = session_metadata.get("selected_document_code")
867
+ response = slow_handler.handle(
868
+ query,
869
+ intent,
870
+ session_id,
871
+ selected_document_code=selected_doc_code,
872
+ )
873
+ response.setdefault("routing", "slow_path")
874
+ response.setdefault(
875
+ "_routing",
876
+ {
877
+ "path": "slow_path",
878
+ "method": getattr(route_decision, "rationale", "router"),
879
+ "confidence": route_decision.confidence,
880
+ },
881
+ )
882
+
883
+ # Cập nhật metadata wizard đơn giản: nếu đang hỏi người dùng chọn văn bản
884
+ # thì đánh dấu stage = choose_document; nếu đã trả lời thì stage = answer.
885
+ if session_id:
886
+ try:
887
+ if response.get("clarification") or response.get("type") == "options":
888
+ ConversationContext.update_session_metadata(
889
+ session_id,
890
+ {
891
+ "wizard_stage": "choose_document",
892
+ },
893
+ )
894
+ else:
895
+ ConversationContext.update_session_metadata(
896
+ session_id,
897
+ {
898
+ "wizard_stage": "answer",
899
+ "last_answer_type": response.get("intent"),
900
+ },
901
+ )
902
+ except Exception:
903
+ # Không để lỗi metadata làm hỏng luồng trả lời chính
904
+ pass
905
+
906
+ logger.info(
907
+ "[LEGAL] Slow path response - source=%s count=%s routing=%s",
908
+ response.get("_source"),
909
+ response.get("count"),
910
+ response.get("_routing"),
911
+ )
912
+ return response
913
+
914
+ def _cache_response(self, query: str, intent: str, response: Dict[str, Any]) -> None:
915
+ """Store response in exact-match cache if eligible."""
916
+ if not self._should_cache_response(intent, response):
917
+ logger.debug(
918
+ "[CACHE] Skip storing response (intent=%s, results=%s)",
919
+ intent,
920
+ response.get("count"),
921
+ )
922
+ return
923
+ payload = copy.deepcopy(response)
924
+ payload.pop("session_id", None)
925
+ payload.pop("_cache", None)
926
+ EXACT_MATCH_CACHE.set(query, intent, payload)
927
+ logger.info(
928
+ "[CACHE] Stored response for intent=%s (results=%s, source=%s)",
929
+ intent,
930
+ response.get("count"),
931
+ response.get("_source"),
932
+ )
933
+
934
+ def _should_cache_response(self, intent: str, response: Dict[str, Any]) -> bool:
935
+ """Determine if response should be cached for exact matches."""
936
+ if response.get("clarification"):
937
+ return False
938
+ cacheable_intents = {
939
+ "search_legal",
940
+ "search_fine",
941
+ "search_procedure",
942
+ "search_office",
943
+ "search_advisory",
944
+ }
945
+ if intent not in cacheable_intents:
946
+ return False
947
+ if response.get("count", 0) <= 0:
948
+ return False
949
+ if not response.get("results"):
950
+ return False
951
+ return True
952
+
953
+ def _query_has_document_code(self, query: str) -> bool:
954
+ """
955
+ Check if the raw query string explicitly contains a known document code pattern
956
+ (ví dụ: '264/QĐ-TW', 'QD-69-TW', 'TT-02-CAND').
957
+ """
958
+ if not query:
959
+ return False
960
+ # Remove accents để regex đơn giản hơn
961
+ normalized = unicodedata.normalize("NFD", query)
962
+ normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
963
+ normalized = normalized.upper()
964
+ for pattern in DOCUMENT_CODE_PATTERNS:
965
+ try:
966
+ if re.search(pattern, normalized):
967
+ return True
968
+ except re.error:
969
+ continue
970
+ return False
971
+
972
+ def _handle_legal_query(self, query: str, session_id: Optional[str] = None) -> Dict[str, Any]:
973
+ """
974
+ Handle legal document queries with RAG pipeline.
975
+
976
+ Args:
977
+ query: User query
978
+ session_id: Optional session ID
979
+
980
+ Returns:
981
+ Response dictionary
982
+ """
983
+ # Search legal sections
984
+ qs = LegalSection.objects.select_related("document").all()
985
+ text_fields = ["section_title", "section_code", "content"]
986
+ legal_sections = self._search_legal_sections(qs, query, text_fields, top_k=5)
987
+
988
+ if not legal_sections:
989
+ return {
990
+ "message": f"Xin lỗi, tôi không tìm thấy văn bản pháp luật liên quan đến '{query}'.",
991
+ "intent": "search_legal",
992
+ "confidence": 0.5,
993
+ "results": [],
994
+ "count": 0,
995
+ "routing": "search"
996
+ }
997
+
998
+ # Try LLM generation if available
999
+ if self.llm_generator and self.llm_generator.provider != "none":
1000
+ try:
1001
+ answer = self.llm_generator.generate_structured_legal_answer(
1002
+ query=query,
1003
+ documents=legal_sections,
1004
+ max_attempts=2
1005
+ )
1006
+ message = answer.summary
1007
+ except Exception as e:
1008
+ print(f"⚠️ LLM generation failed: {e}")
1009
+ message = self._format_legal_results(legal_sections, query)
1010
+ else:
1011
+ # Template-based response
1012
+ message = self._format_legal_results(legal_sections, query)
1013
+
1014
+ # Format results
1015
+ results = []
1016
+ for section in legal_sections:
1017
+ doc = section.document
1018
+ results.append({
1019
+ "type": "legal",
1020
+ "data": {
1021
+ "id": section.id,
1022
+ "section_code": section.section_code,
1023
+ "section_title": section.section_title or "",
1024
+ "content": section.content[:500] + "..." if len(section.content) > 500 else section.content,
1025
+ "excerpt": section.excerpt or "",
1026
+ "document_code": doc.code if doc else "",
1027
+ "document_title": doc.title if doc else "",
1028
+ "page_start": section.page_start,
1029
+ "page_end": section.page_end,
1030
+ "download_url": f"/api/legal-documents/{doc.id}/download/" if doc and doc.id else None,
1031
+ "source_url": doc.source_url if doc else ""
1032
+ }
1033
+ })
1034
+
1035
+ return {
1036
+ "message": message,
1037
+ "intent": "search_legal",
1038
+ "confidence": 0.9,
1039
+ "results": results,
1040
+ "count": len(results),
1041
+ "routing": "search"
1042
+ }
1043
+
1044
+ def _search_legal_sections(self, qs, query: str, text_fields: list, top_k: int = 5):
1045
+ """Search legal sections using ML search."""
1046
+ from hue_portal.core.search_ml import search_with_ml
1047
+ return search_with_ml(qs, query, text_fields, top_k=top_k, min_score=0.1)
1048
+
1049
+ def _format_legal_results(self, sections, query: str) -> str:
1050
+ """Format legal sections into response message."""
1051
+ if not sections:
1052
+ return f"Xin lỗi, tôi không tìm thấy văn bản pháp luật liên quan đến '{query}'."
1053
+
1054
+ doc = sections[0].document
1055
+ doc_info = f"{doc.code}: {doc.title}" if doc else "Văn bản pháp luật"
1056
+
1057
+ message = f"Tôi tìm thấy {len(sections)} điều khoản liên quan đến '{query}' trong {doc_info}:\n\n"
1058
+
1059
+ for i, section in enumerate(sections[:3], 1):
1060
+ section_text = f"{section.section_code}: {section.section_title or ''}\n"
1061
+ section_text += section.content[:200] + "..." if len(section.content) > 200 else section.content
1062
+ message += f"{i}. {section_text}\n\n"
1063
+
1064
+ if len(sections) > 3:
1065
+ message += f"... và {len(sections) - 3} điều khoản khác."
1066
+
1067
+ return message
1068
+
1069
+ def _get_response_template(self, intent: str) -> str:
1070
+ """Get response template for intent."""
1071
+ templates = {
1072
+ "search_fine": "Tôi tìm thấy {count} mức phạt liên quan đến '{query}':",
1073
+ "search_procedure": "Tôi tìm thấy {count} thủ tục liên quan đến '{query}':",
1074
+ "search_office": "Tôi tìm thấy {count} đơn vị liên quan đến '{query}':",
1075
+ "search_advisory": "Tôi tìm thấy {count} cảnh báo liên quan đến '{query}':",
1076
+ }
1077
+ return templates.get(intent, "Tôi tìm thấy {count} kết quả liên quan đến '{query}':")
1078
+
1079
+
1080
+ # Global chatbot instance
1081
+ _chatbot_instance = None
1082
+
1083
+
1084
+ def get_chatbot() -> Chatbot:
1085
+ """Get or create enhanced chatbot instance."""
1086
+ global _chatbot_instance
1087
+ if _chatbot_instance is None:
1088
+ _chatbot_instance = Chatbot()
1089
+ return _chatbot_instance
1090
+
1091
+
1092
+
backend/hue_portal/chatbot/context_manager.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Context manager for conversation sessions and messages.
3
+ """
4
+ from typing import List, Dict, Any, Optional
5
+ from uuid import UUID
6
+ from hue_portal.core.models import ConversationSession, ConversationMessage
7
+
8
+
9
+ class ConversationContext:
10
+ """Manages conversation sessions and context."""
11
+
12
+ @staticmethod
13
+ def get_session(session_id: Optional[str] = None, user_id: Optional[str] = None) -> ConversationSession:
14
+ """
15
+ Get or create a conversation session.
16
+
17
+ Args:
18
+ session_id: Optional session ID (UUID string). If None, creates new session.
19
+ user_id: Optional user ID for tracking.
20
+
21
+ Returns:
22
+ ConversationSession instance.
23
+ """
24
+ if session_id:
25
+ try:
26
+ # Try to get existing session
27
+ session = ConversationSession.objects.get(session_id=session_id)
28
+ # Update updated_at timestamp
29
+ session.save(update_fields=["updated_at"])
30
+ return session
31
+ except ConversationSession.DoesNotExist:
32
+ # Create new session with provided session_id
33
+ return ConversationSession.objects.create(
34
+ session_id=session_id,
35
+ user_id=user_id
36
+ )
37
+ else:
38
+ # Create new session
39
+ return ConversationSession.objects.create(user_id=user_id)
40
+
41
+ @staticmethod
42
+ def add_message(
43
+ session_id: str,
44
+ role: str,
45
+ content: str,
46
+ intent: Optional[str] = None,
47
+ entities: Optional[Dict[str, Any]] = None,
48
+ metadata: Optional[Dict[str, Any]] = None
49
+ ) -> ConversationMessage:
50
+ """
51
+ Add a message to a conversation session.
52
+
53
+ Args:
54
+ session_id: Session ID (UUID string).
55
+ role: Message role ('user' or 'bot').
56
+ content: Message content.
57
+ intent: Detected intent (optional).
58
+ entities: Extracted entities (optional).
59
+ metadata: Additional metadata (optional).
60
+
61
+ Returns:
62
+ ConversationMessage instance.
63
+ """
64
+ session = ConversationContext.get_session(session_id=session_id)
65
+
66
+ return ConversationMessage.objects.create(
67
+ session=session,
68
+ role=role,
69
+ content=content,
70
+ intent=intent or "",
71
+ entities=entities or {},
72
+ metadata=metadata or {}
73
+ )
74
+
75
+ @staticmethod
76
+ def get_recent_messages(session_id: str, limit: int = 10) -> List[ConversationMessage]:
77
+ """
78
+ Get recent messages from a session.
79
+
80
+ Args:
81
+ session_id: Session ID (UUID string).
82
+ limit: Maximum number of messages to return.
83
+
84
+ Returns:
85
+ List of ConversationMessage instances, ordered by timestamp (oldest first).
86
+ """
87
+ try:
88
+ session = ConversationSession.objects.get(session_id=session_id)
89
+ return list(session.messages.all()[:limit])
90
+ except ConversationSession.DoesNotExist:
91
+ return []
92
+
93
+ @staticmethod
94
+ def get_context_summary(session_id: str, max_messages: int = 5) -> Dict[str, Any]:
95
+ """
96
+ Create a summary of conversation context.
97
+
98
+ Args:
99
+ session_id: Session ID (UUID string).
100
+ max_messages: Maximum number of messages to include in summary.
101
+
102
+ Returns:
103
+ Dictionary with context summary including:
104
+ - recent_messages: List of recent messages
105
+ - entities: Aggregated entities from conversation
106
+ - intents: List of intents mentioned
107
+ - message_count: Total number of messages
108
+ """
109
+ messages = ConversationContext.get_recent_messages(session_id, limit=max_messages)
110
+
111
+ # Aggregate entities
112
+ all_entities = {}
113
+ intents = []
114
+
115
+ for msg in messages:
116
+ if msg.entities:
117
+ for key, value in msg.entities.items():
118
+ if key not in all_entities:
119
+ all_entities[key] = []
120
+ if value not in all_entities[key]:
121
+ all_entities[key].append(value)
122
+
123
+ if msg.intent:
124
+ if msg.intent not in intents:
125
+ intents.append(msg.intent)
126
+
127
+ return {
128
+ "recent_messages": [
129
+ {
130
+ "role": msg.role,
131
+ "content": msg.content,
132
+ "intent": msg.intent,
133
+ "timestamp": msg.timestamp.isoformat()
134
+ }
135
+ for msg in messages
136
+ ],
137
+ "entities": all_entities,
138
+ "intents": intents,
139
+ "message_count": len(messages)
140
+ }
141
+
142
+ @staticmethod
143
+ def extract_entities(query: str) -> Dict[str, Any]:
144
+ """
145
+ Extract entities from a query (basic implementation).
146
+ This is a placeholder - will be enhanced by entity_extraction.py
147
+
148
+ Args:
149
+ query: User query string.
150
+
151
+ Returns:
152
+ Dictionary with extracted entities.
153
+ """
154
+ entities = {}
155
+ query_lower = query.lower()
156
+
157
+ # Basic fine code extraction (V001, V002, etc.)
158
+ import re
159
+ fine_codes = re.findall(r'\bV\d{3}\b', query, re.IGNORECASE)
160
+ if fine_codes:
161
+ entities["fine_codes"] = fine_codes
162
+
163
+ # Basic procedure keywords
164
+ procedure_keywords = ["thủ tục", "hồ sơ", "giấy tờ"]
165
+ if any(kw in query_lower for kw in procedure_keywords):
166
+ entities["has_procedure"] = True
167
+
168
+ # Basic fine keywords
169
+ fine_keywords = ["phạt", "mức phạt", "vi phạm"]
170
+ if any(kw in query_lower for kw in fine_keywords):
171
+ entities["has_fine"] = True
172
+
173
+ return entities
174
+
175
+ @staticmethod
176
+ def get_session_metadata(session_id: str) -> Dict[str, Any]:
177
+ """
178
+ Return metadata stored with the conversation session.
179
+ """
180
+ if not session_id:
181
+ return {}
182
+ try:
183
+ session = ConversationSession.objects.get(session_id=session_id)
184
+ return session.metadata or {}
185
+ except ConversationSession.DoesNotExist:
186
+ return {}
187
+
188
+ @staticmethod
189
+ def update_session_metadata(session_id: str, data: Dict[str, Any]) -> Dict[str, Any]:
190
+ """
191
+ Merge provided data into session metadata and persist.
192
+ """
193
+ if not session_id:
194
+ return {}
195
+ session = ConversationContext.get_session(session_id=session_id)
196
+ metadata = session.metadata or {}
197
+ metadata.update(data)
198
+ session.metadata = metadata
199
+ session.save(update_fields=["metadata", "updated_at"])
200
+ return metadata
201
+
202
+ @staticmethod
203
+ def clear_session_metadata_keys(session_id: str, keys: List[str]) -> Dict[str, Any]:
204
+ """
205
+ Remove specific keys from session metadata.
206
+ """
207
+ if not session_id:
208
+ return {}
209
+ session = ConversationContext.get_session(session_id=session_id)
210
+ metadata = session.metadata or {}
211
+ changed = False
212
+ for key in keys:
213
+ if key in metadata:
214
+ metadata.pop(key)
215
+ changed = True
216
+ if changed:
217
+ session.metadata = metadata
218
+ session.save(update_fields=["metadata", "updated_at"])
219
+ return metadata
220
+
backend/hue_portal/chatbot/dialogue_manager.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dialogue management for multi-turn conversations.
3
+ """
4
+ from typing import Dict, Any, Optional, List, Tuple
5
+ from enum import Enum
6
+
7
+
8
+ class DialogueState(Enum):
9
+ """Dialogue states."""
10
+ INITIAL = "initial"
11
+ COLLECTING_INFO = "collecting_info"
12
+ CLARIFYING = "clarifying"
13
+ PROVIDING_ANSWER = "providing_answer"
14
+ FOLLOW_UP = "follow_up"
15
+ COMPLETED = "completed"
16
+
17
+
18
+ class DialogueManager:
19
+ """Manages dialogue state and multi-turn conversations."""
20
+
21
+ def __init__(self):
22
+ self.state = DialogueState.INITIAL
23
+ self.slots = {} # Slot filling for missing information
24
+ self.context_switch_detected = False
25
+
26
+ def update_state(
27
+ self,
28
+ query: str,
29
+ intent: str,
30
+ results_count: int,
31
+ confidence: float,
32
+ recent_messages: Optional[List[Dict[str, Any]]] = None
33
+ ) -> DialogueState:
34
+ """
35
+ Update dialogue state based on current query and context.
36
+
37
+ Args:
38
+ query: Current user query.
39
+ intent: Detected intent.
40
+ results_count: Number of results found.
41
+ confidence: Confidence score.
42
+ recent_messages: Recent conversation messages.
43
+
44
+ Returns:
45
+ Updated dialogue state.
46
+ """
47
+ # Detect context switching
48
+ if recent_messages and len(recent_messages) > 0:
49
+ last_intent = recent_messages[-1].get("intent")
50
+ if last_intent and last_intent != intent and intent != "greeting":
51
+ self.context_switch_detected = True
52
+ self.state = DialogueState.INITIAL
53
+ self.slots = {}
54
+ return self.state
55
+
56
+ # State transitions
57
+ if results_count == 0 and confidence < 0.5:
58
+ # No results and low confidence - need clarification
59
+ self.state = DialogueState.CLARIFYING
60
+ elif results_count > 0 and confidence >= 0.7:
61
+ # Good results - providing answer
62
+ self.state = DialogueState.PROVIDING_ANSWER
63
+ elif results_count > 0 and confidence < 0.7:
64
+ # Some results but uncertain - might need follow-up
65
+ self.state = DialogueState.FOLLOW_UP
66
+ else:
67
+ self.state = DialogueState.PROVIDING_ANSWER
68
+
69
+ return self.state
70
+
71
+ def needs_clarification(
72
+ self,
73
+ query: str,
74
+ intent: str,
75
+ results_count: int
76
+ ) -> Tuple[bool, Optional[str]]:
77
+ """
78
+ Check if clarification is needed.
79
+
80
+ Args:
81
+ query: User query.
82
+ intent: Detected intent.
83
+ results_count: Number of results.
84
+
85
+ Returns:
86
+ Tuple of (needs_clarification, clarification_message).
87
+ """
88
+ if results_count == 0:
89
+ # No results - ask for clarification
90
+ clarification_messages = {
91
+ "search_fine": "Bạn có thể cho biết cụ thể hơn về loại vi phạm không? Ví dụ: vượt đèn đỏ, không đội mũ bảo hiểm...",
92
+ "search_procedure": "Bạn muốn tìm thủ tục nào? Ví dụ: đăng ký cư trú, thủ tục ANTT...",
93
+ "search_office": "Bạn muốn tìm đơn vị nào? Ví dụ: công an phường, điểm tiếp dân...",
94
+ "search_advisory": "Bạn muốn tìm cảnh báo về chủ đề gì?",
95
+ }
96
+ message = clarification_messages.get(intent, "Bạn có thể cung cấp thêm thông tin không?")
97
+ return (True, message)
98
+
99
+ return (False, None)
100
+
101
+ def detect_missing_slots(
102
+ self,
103
+ intent: str,
104
+ query: str,
105
+ results_count: int
106
+ ) -> Dict[str, Any]:
107
+ """
108
+ Detect missing information slots.
109
+
110
+ Args:
111
+ intent: Detected intent.
112
+ query: User query.
113
+ results_count: Number of results.
114
+
115
+ Returns:
116
+ Dictionary of missing slots.
117
+ """
118
+ missing_slots = {}
119
+
120
+ if intent == "search_fine":
121
+ # Check for fine code or fine name
122
+ if "v001" not in query.lower() and "v002" not in query.lower():
123
+ if not any(kw in query.lower() for kw in ["vượt đèn đỏ", "mũ bảo hiểm", "nồng độ cồn"]):
124
+ missing_slots["fine_specification"] = True
125
+
126
+ elif intent == "search_procedure":
127
+ # Check for procedure name or domain
128
+ if not any(kw in query.lower() for kw in ["cư trú", "antt", "pccc", "đăng ký"]):
129
+ missing_slots["procedure_specification"] = True
130
+
131
+ elif intent == "search_office":
132
+ # Check for office name or location
133
+ if not any(kw in query.lower() for kw in ["phường", "huyện", "tỉnh", "điểm tiếp dân"]):
134
+ missing_slots["office_specification"] = True
135
+
136
+ return missing_slots
137
+
138
+ def handle_follow_up(
139
+ self,
140
+ query: str,
141
+ recent_messages: List[Dict[str, Any]]
142
+ ) -> Optional[str]:
143
+ """
144
+ Generate follow-up question if needed.
145
+
146
+ Args:
147
+ query: Current query.
148
+ recent_messages: Recent conversation messages.
149
+
150
+ Returns:
151
+ Follow-up question or None.
152
+ """
153
+ if not recent_messages:
154
+ return None
155
+
156
+ # Check if query is very short (likely a follow-up)
157
+ if len(query.split()) <= 3:
158
+ last_message = recent_messages[-1]
159
+ last_intent = last_message.get("intent")
160
+
161
+ if last_intent == "search_fine":
162
+ return "Bạn muốn biết thêm thông tin gì về mức phạt này? (ví dụ: điều luật, biện pháp khắc phục)"
163
+ elif last_intent == "search_procedure":
164
+ return "Bạn muốn biết thêm thông tin gì về thủ tục này? (ví dụ: hồ sơ, lệ phí, thời hạn)"
165
+
166
+ return None
167
+
168
+ def reset(self):
169
+ """Reset dialogue manager state."""
170
+ self.state = DialogueState.INITIAL
171
+ self.slots = {}
172
+ self.context_switch_detected = False
173
+
backend/hue_portal/chatbot/document_topics.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Domain-specific knowledge for clarification prompts.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from typing import List, Dict
7
+
8
+
9
+ DOCUMENT_TOPICS: List[Dict[str, str]] = [
10
+ {
11
+ "code": "264-QD-TW",
12
+ "title": "Quy định 264/QĐ-TW (sửa đổi, bổ sung Quy định 69/QĐ-TW)",
13
+ "doc_type": "Quy định",
14
+ "summary": "Văn bản của Ban Chấp hành Trung ương về kỷ luật tổ chức đảng, thay thế quy định 69.",
15
+ "keywords": [
16
+ "264",
17
+ "quy định 264",
18
+ "qd 264",
19
+ "đảng",
20
+ "tổ chức đảng",
21
+ "kỷ luật đảng",
22
+ "ban chấp hành trung ương",
23
+ ],
24
+ },
25
+ {
26
+ "code": "QD-69-TW",
27
+ "title": "Quy định 69/QĐ-TW về kỷ luật tổ chức đảng, đảng viên vi phạm",
28
+ "doc_type": "Quy định",
29
+ "summary": "Quy định kỷ luật của Đảng ban hành năm 2022, nền tảng cho xử lý kỷ luật đảng viên.",
30
+ "keywords": [
31
+ "69",
32
+ "qd 69",
33
+ "quy định 69",
34
+ "kỷ luật đảng viên",
35
+ "kỷ luật cán bộ",
36
+ "vi phạm đảng",
37
+ ],
38
+ },
39
+ {
40
+ "code": "TT-02-CAND",
41
+ "title": "Thông tư 02/2021/TT-BCA về xử lý điều lệnh trong Công an nhân dân",
42
+ "doc_type": "Thông tư",
43
+ "summary": "Quy định xử lý vi phạm điều lệnh, hạ bậc thi đua đối với đơn vị thuộc CAND.",
44
+ "keywords": [
45
+ "thông tư 02",
46
+ "tt 02",
47
+ "điều lệnh",
48
+ "công an",
49
+ "cand",
50
+ "thi đua",
51
+ "đơn vị",
52
+ ],
53
+ },
54
+ {
55
+ "code": "TT-02-BIEN-SOAN",
56
+ "title": "Thông tư 02/2018/TT-BCA (Biên soạn) về soạn thảo văn bản",
57
+ "doc_type": "Thông tư",
58
+ "summary": "Hướng dẫn biên soạn, trình bày văn bản thuộc Bộ Công an.",
59
+ "keywords": [
60
+ "biên soạn",
61
+ "soạn thảo",
62
+ "thông tư 02 biên soạn",
63
+ ],
64
+ },
65
+ ]
66
+
67
+
68
+ def find_topic_by_code(code: str) -> Dict[str, str] | None:
69
+ code_upper = code.strip().upper()
70
+ for topic in DOCUMENT_TOPICS:
71
+ if topic["code"].upper() == code_upper:
72
+ return topic
73
+ return None
74
+
backend/hue_portal/chatbot/download_progress.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Download progress tracker for Hugging Face models.
3
+ Tracks real-time download progress in bytes.
4
+ """
5
+ import threading
6
+ import time
7
+ from typing import Dict, Optional
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class DownloadProgress:
13
+ """Track download progress for a single file."""
14
+ filename: str
15
+ total_bytes: int = 0
16
+ downloaded_bytes: int = 0
17
+ started_at: Optional[float] = None
18
+ completed_at: Optional[float] = None
19
+ speed_bytes_per_sec: float = 0.0
20
+
21
+ @property
22
+ def percentage(self) -> float:
23
+ """Calculate download percentage."""
24
+ if self.total_bytes == 0:
25
+ return 0.0
26
+ return min(100.0, (self.downloaded_bytes / self.total_bytes) * 100.0)
27
+
28
+ @property
29
+ def is_complete(self) -> bool:
30
+ """Check if download is complete."""
31
+ return self.total_bytes > 0 and self.downloaded_bytes >= self.total_bytes
32
+
33
+ @property
34
+ def elapsed_time(self) -> float:
35
+ """Get elapsed time in seconds."""
36
+ if self.started_at is None:
37
+ return 0.0
38
+ end_time = self.completed_at or time.time()
39
+ return end_time - self.started_at
40
+
41
+
42
+ @dataclass
43
+ class ModelDownloadProgress:
44
+ """Track overall download progress for a model."""
45
+ model_path: str
46
+ files: Dict[str, DownloadProgress] = field(default_factory=dict)
47
+ started_at: Optional[float] = None
48
+ completed_at: Optional[float] = None
49
+
50
+ def update_file(self, filename: str, downloaded: int, total: int):
51
+ """Update progress for a specific file."""
52
+ if filename not in self.files:
53
+ self.files[filename] = DownloadProgress(
54
+ filename=filename,
55
+ started_at=time.time()
56
+ )
57
+ if self.started_at is None:
58
+ self.started_at = time.time()
59
+
60
+ file_progress = self.files[filename]
61
+ file_progress.downloaded_bytes = downloaded
62
+ file_progress.total_bytes = total
63
+
64
+ # Calculate speed
65
+ if file_progress.started_at:
66
+ elapsed = time.time() - file_progress.started_at
67
+ if elapsed > 0:
68
+ file_progress.speed_bytes_per_sec = downloaded / elapsed
69
+
70
+ # Mark as complete
71
+ if total > 0 and downloaded >= total:
72
+ file_progress.completed_at = time.time()
73
+
74
+ def complete_file(self, filename: str):
75
+ """Mark a file as complete."""
76
+ if filename in self.files:
77
+ self.files[filename].completed_at = time.time()
78
+
79
+ @property
80
+ def total_bytes(self) -> int:
81
+ """Get total bytes across all files."""
82
+ return sum(f.total_bytes for f in self.files.values())
83
+
84
+ @property
85
+ def downloaded_bytes(self) -> int:
86
+ """Get downloaded bytes across all files."""
87
+ return sum(f.downloaded_bytes for f in self.files.values())
88
+
89
+ @property
90
+ def percentage(self) -> float:
91
+ """Calculate overall download percentage."""
92
+ total = self.total_bytes
93
+ if total == 0:
94
+ # If no total yet, count completed files
95
+ if len(self.files) == 0:
96
+ return 0.0
97
+ completed = sum(1 for f in self.files.values() if f.is_complete)
98
+ return (completed / len(self.files)) * 100.0
99
+ return min(100.0, (self.downloaded_bytes / total) * 100.0)
100
+
101
+ @property
102
+ def is_complete(self) -> bool:
103
+ """Check if all files are downloaded."""
104
+ if len(self.files) == 0:
105
+ return False
106
+ return all(f.is_complete for f in self.files.values())
107
+
108
+ @property
109
+ def speed_bytes_per_sec(self) -> float:
110
+ """Get overall download speed."""
111
+ total_speed = sum(f.speed_bytes_per_sec for f in self.files.values() if f.started_at)
112
+ return total_speed
113
+
114
+ @property
115
+ def elapsed_time(self) -> float:
116
+ """Get elapsed time in seconds."""
117
+ if self.started_at is None:
118
+ return 0.0
119
+ end_time = self.completed_at or time.time()
120
+ return end_time - self.started_at
121
+
122
+ def to_dict(self) -> Dict:
123
+ """Convert to dictionary for JSON serialization."""
124
+ return {
125
+ "model_path": self.model_path,
126
+ "total_bytes": self.total_bytes,
127
+ "downloaded_bytes": self.downloaded_bytes,
128
+ "percentage": round(self.percentage, 2),
129
+ "speed_bytes_per_sec": round(self.speed_bytes_per_sec, 2),
130
+ "speed_mb_per_sec": round(self.speed_bytes_per_sec / (1024 * 1024), 2),
131
+ "elapsed_time": round(self.elapsed_time, 2),
132
+ "is_complete": self.is_complete,
133
+ "files_count": len(self.files),
134
+ "files_completed": sum(1 for f in self.files.values() if f.is_complete),
135
+ "files": {
136
+ name: {
137
+ "filename": f.filename,
138
+ "total_bytes": f.total_bytes,
139
+ "downloaded_bytes": f.downloaded_bytes,
140
+ "percentage": round(f.percentage, 2),
141
+ "speed_mb_per_sec": round(f.speed_bytes_per_sec / (1024 * 1024), 2),
142
+ "is_complete": f.is_complete
143
+ }
144
+ for name, f in self.files.items()
145
+ }
146
+ }
147
+
148
+
149
+ class ProgressTracker:
150
+ """Thread-safe progress tracker for multiple models."""
151
+
152
+ def __init__(self):
153
+ self._progress: Dict[str, ModelDownloadProgress] = {}
154
+ self._lock = threading.Lock()
155
+
156
+ def get_or_create(self, model_path: str) -> ModelDownloadProgress:
157
+ """Get or create progress tracker for a model."""
158
+ with self._lock:
159
+ if model_path not in self._progress:
160
+ self._progress[model_path] = ModelDownloadProgress(model_path=model_path)
161
+ return self._progress[model_path]
162
+
163
+ def get(self, model_path: str) -> Optional[ModelDownloadProgress]:
164
+ """Get progress tracker for a model."""
165
+ with self._lock:
166
+ return self._progress.get(model_path)
167
+
168
+ def update(self, model_path: str, filename: str, downloaded: int, total: int):
169
+ """Update download progress for a file."""
170
+ progress = self.get_or_create(model_path)
171
+ progress.update_file(filename, downloaded, total)
172
+
173
+ def complete_file(self, model_path: str, filename: str):
174
+ """Mark a file as complete."""
175
+ progress = self.get(model_path)
176
+ if progress:
177
+ progress.complete_file(filename)
178
+
179
+ def complete_model(self, model_path: str):
180
+ """Mark entire model download as complete."""
181
+ progress = self.get(model_path)
182
+ if progress:
183
+ progress.completed_at = time.time()
184
+
185
+ def get_all(self) -> Dict[str, Dict]:
186
+ """Get all progress as dictionary."""
187
+ with self._lock:
188
+ return {
189
+ path: prog.to_dict()
190
+ for path, prog in self._progress.items()
191
+ }
192
+
193
+ def get_model_progress(self, model_path: str) -> Optional[Dict]:
194
+ """Get progress for a specific model."""
195
+ progress = self.get(model_path)
196
+ if progress:
197
+ return progress.to_dict()
198
+ return None
199
+
200
+
201
+ # Global progress tracker instance
202
+ _global_tracker = ProgressTracker()
203
+
204
+
205
+ def get_progress_tracker() -> ProgressTracker:
206
+ """Get global progress tracker instance."""
207
+ return _global_tracker
208
+
209
+
210
+ def create_progress_callback(model_path: str):
211
+ """
212
+ Create a progress callback for huggingface_hub downloads.
213
+
214
+ Usage:
215
+ from huggingface_hub import snapshot_download
216
+ callback = create_progress_callback("Qwen/Qwen2.5-32B-Instruct")
217
+ snapshot_download(repo_id=model_path, resume_download=True,
218
+ tqdm_class=callback)
219
+ """
220
+ tracker = get_progress_tracker()
221
+
222
+ class ProgressCallback:
223
+ """Progress callback for tqdm."""
224
+
225
+ def __init__(self, *args, **kwargs):
226
+ # Store tqdm arguments but don't initialize yet
227
+ self.tqdm_args = args
228
+ self.tqdm_kwargs = kwargs
229
+ self.current_file = None
230
+
231
+ def __call__(self, *args, **kwargs):
232
+ # This will be called by huggingface_hub
233
+ # We'll intercept the progress updates
234
+ pass
235
+
236
+ def update(self, n: int = 1):
237
+ """Update progress."""
238
+ if self.current_file:
239
+ # Get current progress from tqdm
240
+ if hasattr(self, 'n'):
241
+ downloaded = self.n
242
+ else:
243
+ downloaded = n
244
+ if hasattr(self, 'total'):
245
+ total = self.total
246
+ else:
247
+ total = 0
248
+ tracker.update(model_path, self.current_file, downloaded, total)
249
+
250
+ def set_description(self, desc: str):
251
+ """Set description (filename)."""
252
+ # Extract filename from description
253
+ if desc:
254
+ self.current_file = desc.split()[-1] if ' ' in desc else desc
255
+
256
+ def close(self):
257
+ """Close progress bar."""
258
+ if self.current_file:
259
+ tracker.complete_file(model_path, self.current_file)
260
+
261
+ return ProgressCallback
262
+
263
+
264
+ def create_hf_progress_callback(model_path: str):
265
+ """
266
+ Create a progress callback compatible with huggingface_hub.
267
+ Returns a function that can be used with tqdm.
268
+ """
269
+ tracker = get_progress_tracker()
270
+ current_file = [None] # Use list to allow modification in nested function
271
+
272
+ def progress_callback(tqdm_bar):
273
+ """Progress callback function."""
274
+ if tqdm_bar.desc:
275
+ # Extract filename from description
276
+ filename = tqdm_bar.desc.split()[-1] if ' ' in tqdm_bar.desc else tqdm_bar.desc
277
+ if filename != current_file[0]:
278
+ current_file[0] = filename
279
+ if current_file[0] not in tracker.get_or_create(model_path).files:
280
+ tracker.get_or_create(model_path).files[current_file[0]] = DownloadProgress(
281
+ filename=current_file[0],
282
+ started_at=time.time()
283
+ )
284
+
285
+ if current_file[0]:
286
+ downloaded = getattr(tqdm_bar, 'n', 0)
287
+ total = getattr(tqdm_bar, 'total', 0)
288
+ tracker.update(model_path, current_file[0], downloaded, total)
289
+
290
+ return progress_callback
291
+
292
+
293
+
294
+
backend/hue_portal/chatbot/dual_path_router.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dual-Path RAG Router - Routes queries to Fast Path (golden dataset) or Slow Path (full RAG).
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import re
7
+ import unicodedata
8
+ from dataclasses import dataclass
9
+ from typing import Dict, Optional, List, Tuple
10
+ import numpy as np
11
+ from django.db.models import Q
12
+
13
+ from hue_portal.core.models import GoldenQuery
14
+ from hue_portal.core.embeddings import get_embedding_model
15
+
16
+
17
+ @dataclass
18
+ class RouteDecision:
19
+ """Decision from Dual-Path Router."""
20
+ path: str # "fast_path" or "slow_path"
21
+ method: str # "keyword" or "llm" or "similarity" or "default"
22
+ confidence: float
23
+ matched_golden_query_id: Optional[int] = None
24
+ similarity_score: Optional[float] = None
25
+ intent: Optional[str] = None
26
+ rationale: str = ""
27
+
28
+
29
+ class KeywordRouter:
30
+ """Fast keyword-based router to match queries against golden dataset."""
31
+
32
+ def __init__(self):
33
+ self._normalize_cache = {}
34
+
35
+ def _normalize_query(self, query: str) -> str:
36
+ """Normalize query for matching (lowercase, remove accents, extra spaces)."""
37
+ if query in self._normalize_cache:
38
+ return self._normalize_cache[query]
39
+
40
+ normalized = query.lower().strip()
41
+ # Remove accents for accent-insensitive matching
42
+ normalized = unicodedata.normalize("NFD", normalized)
43
+ normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
44
+ # Remove extra spaces
45
+ normalized = re.sub(r'\s+', ' ', normalized).strip()
46
+
47
+ self._normalize_cache[query] = normalized
48
+ return normalized
49
+
50
+ def route(self, query: str, intent: str, confidence: float) -> RouteDecision:
51
+ """
52
+ Try to match query against golden dataset using keyword matching.
53
+
54
+ Returns:
55
+ RouteDecision with path="fast_path" if match found, else path="slow_path"
56
+ """
57
+ query_normalized = self._normalize_query(query)
58
+
59
+ # Try exact match first (fastest)
60
+ try:
61
+ golden_query = GoldenQuery.objects.get(
62
+ query_normalized=query_normalized,
63
+ is_active=True
64
+ )
65
+ return RouteDecision(
66
+ path="fast_path",
67
+ method="keyword",
68
+ confidence=1.0,
69
+ matched_golden_query_id=golden_query.id,
70
+ intent=intent,
71
+ rationale="exact_match"
72
+ )
73
+ except (GoldenQuery.DoesNotExist, GoldenQuery.MultipleObjectsReturned):
74
+ pass
75
+
76
+ # Try fuzzy match: check if query contains golden query or vice versa
77
+ # This handles variations like "mức phạt vượt đèn đỏ" vs "vượt đèn đỏ phạt bao nhiêu"
78
+ try:
79
+ # Find golden queries with same intent
80
+ golden_queries = GoldenQuery.objects.filter(
81
+ intent=intent,
82
+ is_active=True
83
+ )[:50] # Limit to avoid too many comparisons
84
+
85
+ for gq in golden_queries:
86
+ gq_normalized = self._normalize_query(gq.query)
87
+
88
+ # Check if query is substring of golden query or vice versa
89
+ if (query_normalized in gq_normalized or
90
+ gq_normalized in query_normalized):
91
+ # Calculate similarity (simple Jaccard similarity)
92
+ query_words = set(query_normalized.split())
93
+ gq_words = set(gq_normalized.split())
94
+ if query_words and gq_words:
95
+ similarity = len(query_words & gq_words) / len(query_words | gq_words)
96
+ if similarity >= 0.7: # 70% word overlap
97
+ return RouteDecision(
98
+ path="fast_path",
99
+ method="keyword",
100
+ confidence=similarity,
101
+ matched_golden_query_id=gq.id,
102
+ similarity_score=similarity,
103
+ intent=intent,
104
+ rationale="fuzzy_match"
105
+ )
106
+ except Exception:
107
+ pass
108
+
109
+ # No match found
110
+ return RouteDecision(
111
+ path="slow_path",
112
+ method="keyword",
113
+ confidence=confidence,
114
+ intent=intent,
115
+ rationale="no_keyword_match"
116
+ )
117
+
118
+
119
+ class DualPathRouter:
120
+ """Main router that decides Fast Path vs Slow Path using hybrid approach."""
121
+
122
+ def __init__(self, similarity_threshold: float = 0.85):
123
+ """
124
+ Initialize Dual-Path Router.
125
+
126
+ Args:
127
+ similarity_threshold: Minimum similarity score for semantic matching (default: 0.85)
128
+ """
129
+ self.keyword_router = KeywordRouter()
130
+ self.llm_router = None # Lazy load if needed
131
+ self.similarity_threshold = similarity_threshold
132
+ self._embedding_model = None
133
+
134
+ def route(self, query: str, intent: str, confidence: float) -> RouteDecision:
135
+ """
136
+ Route query to Fast Path or Slow Path.
137
+
138
+ Args:
139
+ query: User query string.
140
+ intent: Detected intent.
141
+ confidence: Intent classification confidence.
142
+
143
+ Returns:
144
+ RouteDecision with path, method, and matched golden query ID if applicable.
145
+ """
146
+ # Step 1: Keyword-based routing (fastest, ~1-5ms)
147
+ keyword_decision = self.keyword_router.route(query, intent, confidence)
148
+ if keyword_decision.path == "fast_path":
149
+ return keyword_decision
150
+
151
+ # Step 2: Semantic similarity search in golden dataset (~50-100ms)
152
+ similarity_match = self._find_similar_golden_query(query, intent)
153
+ if similarity_match and similarity_match['score'] >= self.similarity_threshold:
154
+ return RouteDecision(
155
+ path="fast_path",
156
+ method="similarity",
157
+ confidence=similarity_match['score'],
158
+ matched_golden_query_id=similarity_match['id'],
159
+ similarity_score=similarity_match['score'],
160
+ intent=intent,
161
+ rationale="semantic_similarity"
162
+ )
163
+
164
+ # Step 3: LLM router fallback (for edge cases, ~100-200ms)
165
+ # Only use if confidence is low (uncertain intent)
166
+ if confidence < 0.7:
167
+ llm_decision = self._llm_route(query, intent)
168
+ if llm_decision and llm_decision.path == "fast_path":
169
+ return llm_decision
170
+
171
+ # Default: Slow Path (full RAG pipeline)
172
+ return RouteDecision(
173
+ path="slow_path",
174
+ method="default",
175
+ confidence=confidence,
176
+ intent=intent,
177
+ rationale="no_fast_path_match"
178
+ )
179
+
180
+ def _find_similar_golden_query(self, query: str, intent: str) -> Optional[Dict]:
181
+ """
182
+ Find similar query in golden dataset using semantic search.
183
+
184
+ Args:
185
+ query: User query.
186
+ intent: Detected intent.
187
+
188
+ Returns:
189
+ Dict with 'id' and 'score' if match found, None otherwise.
190
+ """
191
+ try:
192
+ # Get active golden queries with same intent
193
+ golden_queries = list(
194
+ GoldenQuery.objects.filter(
195
+ intent=intent,
196
+ is_active=True,
197
+ query_embedding__isnull=False
198
+ )[:100] # Limit for performance
199
+ )
200
+
201
+ if not golden_queries:
202
+ return None
203
+
204
+ # Get embedding model
205
+ embedding_model = self._get_embedding_model()
206
+ if not embedding_model:
207
+ return None
208
+
209
+ # Generate query embedding
210
+ query_embedding = embedding_model.encode(query, convert_to_numpy=True)
211
+ query_embedding = query_embedding / np.linalg.norm(query_embedding) # Normalize
212
+
213
+ # Calculate similarities
214
+ best_match = None
215
+ best_score = 0.0
216
+
217
+ for gq in golden_queries:
218
+ if not gq.query_embedding:
219
+ continue
220
+
221
+ # Load golden query embedding
222
+ gq_embedding = np.array(gq.query_embedding)
223
+ if len(gq_embedding) == 0:
224
+ continue
225
+
226
+ # Normalize
227
+ gq_embedding = gq_embedding / np.linalg.norm(gq_embedding)
228
+
229
+ # Calculate cosine similarity
230
+ similarity = float(np.dot(query_embedding, gq_embedding))
231
+
232
+ if similarity > best_score:
233
+ best_score = similarity
234
+ best_match = gq.id
235
+
236
+ if best_match and best_score >= self.similarity_threshold:
237
+ return {
238
+ 'id': best_match,
239
+ 'score': best_score
240
+ }
241
+
242
+ return None
243
+
244
+ except Exception as e:
245
+ # Log error but don't fail
246
+ import logging
247
+ logger = logging.getLogger(__name__)
248
+ logger.warning(f"Error in semantic similarity search: {e}")
249
+ return None
250
+
251
+ def _get_embedding_model(self):
252
+ """Lazy load embedding model."""
253
+ if self._embedding_model is None:
254
+ self._embedding_model = get_embedding_model()
255
+ return self._embedding_model
256
+
257
+ def _llm_route(self, query: str, intent: str) -> Optional[RouteDecision]:
258
+ """
259
+ Use LLM to decide routing (optional, for edge cases).
260
+
261
+ This is a fallback for low-confidence queries where keyword and similarity
262
+ didn't find a match, but LLM might recognize it as a common query.
263
+
264
+ Args:
265
+ query: User query.
266
+ intent: Detected intent.
267
+
268
+ Returns:
269
+ RouteDecision if LLM finds a match, None otherwise.
270
+ """
271
+ # For now, return None (LLM routing can be implemented later if needed)
272
+ # This would require a small LLM (7B) to classify if query matches golden dataset
273
+ return None
274
+
backend/hue_portal/chatbot/entity_extraction.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Entity extraction utilities for extracting fine codes, procedure names, and resolving pronouns.
3
+ """
4
+ import re
5
+ from typing import List, Dict, Any, Optional, Tuple
6
+ from hue_portal.core.models import Fine, Procedure, Office
7
+
8
+
9
+ def extract_fine_code(text: str) -> Optional[str]:
10
+ """
11
+ Extract fine code (V001, V002, etc.) from text.
12
+
13
+ Args:
14
+ text: Input text.
15
+
16
+ Returns:
17
+ Fine code string or None if not found.
18
+ """
19
+ # Pattern: V followed by 3 digits
20
+ pattern = r'\bV\d{3}\b'
21
+ matches = re.findall(pattern, text, re.IGNORECASE)
22
+ if matches:
23
+ return matches[0].upper()
24
+ return None
25
+
26
+
27
+ def extract_procedure_name(text: str) -> Optional[str]:
28
+ """
29
+ Extract procedure name from text by matching against database.
30
+
31
+ Args:
32
+ text: Input text.
33
+
34
+ Returns:
35
+ Procedure name or None if not found.
36
+ """
37
+ text_lower = text.lower()
38
+
39
+ # Get all procedures and check for matches
40
+ procedures = Procedure.objects.all()
41
+ for procedure in procedures:
42
+ procedure_title_lower = procedure.title.lower()
43
+ # Check if procedure title appears in text
44
+ if procedure_title_lower in text_lower or text_lower in procedure_title_lower:
45
+ return procedure.title
46
+
47
+ return None
48
+
49
+
50
+ def extract_office_name(text: str) -> Optional[str]:
51
+ """
52
+ Extract office/unit name from text by matching against database.
53
+
54
+ Args:
55
+ text: Input text.
56
+
57
+ Returns:
58
+ Office name or None if not found.
59
+ """
60
+ text_lower = text.lower()
61
+
62
+ # Get all offices and check for matches
63
+ offices = Office.objects.all()
64
+ for office in offices:
65
+ office_name_lower = office.unit_name.lower()
66
+ # Check if office name appears in text
67
+ if office_name_lower in text_lower or text_lower in office_name_lower:
68
+ return office.unit_name
69
+
70
+ return None
71
+
72
+
73
+ def extract_reference_pronouns(text: str, context: Optional[List[Dict[str, Any]]] = None) -> List[str]:
74
+ """
75
+ Extract reference pronouns from text.
76
+
77
+ Args:
78
+ text: Input text.
79
+ context: Optional context from recent messages.
80
+
81
+ Returns:
82
+ List of pronouns found.
83
+ """
84
+ # Vietnamese reference pronouns
85
+ pronouns = [
86
+ "cái đó", "cái này", "cái kia",
87
+ "như vậy", "như thế",
88
+ "thủ tục đó", "thủ tục này",
89
+ "mức phạt đó", "mức phạt này",
90
+ "đơn vị đó", "đơn vị này",
91
+ "nó", "đó", "này", "kia"
92
+ ]
93
+
94
+ text_lower = text.lower()
95
+ found_pronouns = []
96
+
97
+ for pronoun in pronouns:
98
+ if pronoun in text_lower:
99
+ found_pronouns.append(pronoun)
100
+
101
+ return found_pronouns
102
+
103
+
104
+ def enhance_query_with_context(query: str, recent_messages: List[Dict[str, Any]]) -> str:
105
+ """
106
+ Enhance query with entities from conversation context.
107
+ This is more comprehensive than resolve_pronouns - it adds context even when query already has keywords.
108
+
109
+ Args:
110
+ query: Current query.
111
+ recent_messages: List of recent messages with role, content, intent, entities.
112
+
113
+ Returns:
114
+ Enhanced query with context entities added.
115
+ """
116
+ if not recent_messages:
117
+ return query
118
+
119
+ # Collect entities from recent messages (reverse order - most recent first)
120
+ entities_found = {}
121
+
122
+ for msg in reversed(recent_messages):
123
+ # Check message content for entities
124
+ content = msg.get("content", "")
125
+
126
+ # Extract document code (highest priority for legal queries)
127
+ document_code = extract_document_code(content)
128
+ if document_code and "document_code" not in entities_found:
129
+ entities_found["document_code"] = document_code
130
+
131
+ # Extract fine code
132
+ fine_code = extract_fine_code(content)
133
+ if fine_code and "fine_code" not in entities_found:
134
+ entities_found["fine_code"] = fine_code
135
+
136
+ # Extract procedure name
137
+ procedure_name = extract_procedure_name(content)
138
+ if procedure_name and "procedure_name" not in entities_found:
139
+ entities_found["procedure_name"] = procedure_name
140
+
141
+ # Extract office name
142
+ office_name = extract_office_name(content)
143
+ if office_name and "office_name" not in entities_found:
144
+ entities_found["office_name"] = office_name
145
+
146
+ # Check entities field
147
+ msg_entities = msg.get("entities", {})
148
+ for key, value in msg_entities.items():
149
+ if key not in entities_found:
150
+ entities_found[key] = value
151
+
152
+ # Check intent to infer entity type
153
+ intent = msg.get("intent", "")
154
+ if intent == "search_fine" and "fine_name" not in entities_found:
155
+ # Try to extract fine name from content
156
+ fine_keywords = ["vượt đèn đỏ", "mũ bảo hiểm", "nồng độ cồn", "t��c độ"]
157
+ for keyword in fine_keywords:
158
+ if keyword in content.lower():
159
+ entities_found["fine_name"] = keyword
160
+ break
161
+
162
+ if intent == "search_procedure" and "procedure_name" not in entities_found:
163
+ procedure_keywords = ["đăng ký", "thủ tục", "cư trú", "antt", "pccc"]
164
+ for keyword in procedure_keywords:
165
+ if keyword in content.lower():
166
+ entities_found["procedure_name"] = keyword
167
+ break
168
+
169
+ if intent == "search_legal" and "document_code" not in entities_found:
170
+ # Try to extract document code from content if not already found
171
+ doc_code = extract_document_code(content)
172
+ if doc_code:
173
+ entities_found["document_code"] = doc_code
174
+
175
+ # Enhance query with context entities
176
+ enhanced_parts = [query]
177
+ query_lower = query.lower()
178
+
179
+ # If query mentions a document but doesn't have the code, add it from context
180
+ if "thông tư" in query_lower or "quyết định" in query_lower or "quy định" in query_lower:
181
+ if "document_code" in entities_found:
182
+ doc_code = entities_found["document_code"]
183
+ # Only add if not already in query
184
+ if doc_code.lower() not in query_lower:
185
+ enhanced_parts.append(doc_code)
186
+
187
+ # Add document code if intent is legal and code is in context
188
+ # This helps with follow-up questions like "nói rõ hơn về thông tư 02"
189
+ if "document_code" in entities_found:
190
+ doc_code = entities_found["document_code"]
191
+ if doc_code.lower() not in query_lower:
192
+ # Add document code to enhance search
193
+ enhanced_parts.append(doc_code)
194
+
195
+ return " ".join(enhanced_parts)
196
+
197
+
198
+ def resolve_pronouns(query: str, recent_messages: List[Dict[str, Any]]) -> str:
199
+ """
200
+ Resolve pronouns in query by replacing them with actual entities from context.
201
+ This is a simpler version that only handles pronoun replacement.
202
+ For comprehensive context enhancement, use enhance_query_with_context().
203
+
204
+ Args:
205
+ query: Current query with pronouns.
206
+ recent_messages: List of recent messages with role, content, intent, entities.
207
+
208
+ Returns:
209
+ Enhanced query with pronouns resolved.
210
+ """
211
+ if not recent_messages:
212
+ return query
213
+
214
+ # Check for pronouns
215
+ pronouns = extract_reference_pronouns(query)
216
+ if not pronouns:
217
+ return query
218
+
219
+ # Look for entities in recent messages (reverse order - most recent first)
220
+ resolved_query = query
221
+ entities_found = {}
222
+
223
+ for msg in reversed(recent_messages):
224
+ # Check message content for entities
225
+ content = msg.get("content", "")
226
+
227
+ # Extract fine code
228
+ fine_code = extract_fine_code(content)
229
+ if fine_code and "fine_code" not in entities_found:
230
+ entities_found["fine_code"] = fine_code
231
+
232
+ # Extract procedure name
233
+ procedure_name = extract_procedure_name(content)
234
+ if procedure_name and "procedure_name" not in entities_found:
235
+ entities_found["procedure_name"] = procedure_name
236
+
237
+ # Extract office name
238
+ office_name = extract_office_name(content)
239
+ if office_name and "office_name" not in entities_found:
240
+ entities_found["office_name"] = office_name
241
+
242
+ # Extract document code
243
+ document_code = extract_document_code(content)
244
+ if document_code and "document_code" not in entities_found:
245
+ entities_found["document_code"] = document_code
246
+
247
+ # Check entities field
248
+ msg_entities = msg.get("entities", {})
249
+ for key, value in msg_entities.items():
250
+ if key not in entities_found:
251
+ entities_found[key] = value
252
+
253
+ # Check intent to infer entity type
254
+ intent = msg.get("intent", "")
255
+ if intent == "search_fine" and "fine_name" not in entities_found:
256
+ fine_keywords = ["vượt đèn đỏ", "mũ bảo hiểm", "nồng độ cồn", "tốc độ"]
257
+ for keyword in fine_keywords:
258
+ if keyword in content.lower():
259
+ entities_found["fine_name"] = keyword
260
+ break
261
+
262
+ if intent == "search_procedure" and "procedure_name" not in entities_found:
263
+ procedure_keywords = ["đăng ký", "thủ tục", "cư trú", "antt", "pccc"]
264
+ for keyword in procedure_keywords:
265
+ if keyword in content.lower():
266
+ entities_found["procedure_name"] = keyword
267
+ break
268
+
269
+ # Replace pronouns with entities
270
+ query_lower = query.lower()
271
+
272
+ # Replace "cái đó", "cái này", "nó" with most relevant entity
273
+ if any(pronoun in query_lower for pronoun in ["cái đó", "cái này", "nó", "đó"]):
274
+ if "document_code" in entities_found:
275
+ resolved_query = re.sub(
276
+ r'\b(cái đó|cái này|nó|đó)\b',
277
+ entities_found["document_code"],
278
+ resolved_query,
279
+ flags=re.IGNORECASE
280
+ )
281
+ elif "fine_name" in entities_found:
282
+ resolved_query = re.sub(
283
+ r'\b(cái đó|cái này|nó|đó)\b',
284
+ entities_found["fine_name"],
285
+ resolved_query,
286
+ flags=re.IGNORECASE
287
+ )
288
+ elif "procedure_name" in entities_found:
289
+ resolved_query = re.sub(
290
+ r'\b(cái đó|cái này|nó|đó)\b',
291
+ entities_found["procedure_name"],
292
+ resolved_query,
293
+ flags=re.IGNORECASE
294
+ )
295
+ elif "office_name" in entities_found:
296
+ resolved_query = re.sub(
297
+ r'\b(cái đó|cái này|nó|đó)\b',
298
+ entities_found["office_name"],
299
+ resolved_query,
300
+ flags=re.IGNORECASE
301
+ )
302
+
303
+ # Replace "thủ tục đó", "thủ tục này" with procedure name
304
+ if "thủ tục" in query_lower and "procedure_name" in entities_found:
305
+ resolved_query = re.sub(
306
+ r'\bthủ tục (đó|này)\b',
307
+ entities_found["procedure_name"],
308
+ resolved_query,
309
+ flags=re.IGNORECASE
310
+ )
311
+
312
+ # Replace "mức phạt đó", "mức phạt này" with fine name
313
+ if "mức phạt" in query_lower and "fine_name" in entities_found:
314
+ resolved_query = re.sub(
315
+ r'\bmức phạt (đó|này)\b',
316
+ entities_found["fine_name"],
317
+ resolved_query,
318
+ flags=re.IGNORECASE
319
+ )
320
+
321
+ return resolved_query
322
+
323
+
324
+ def extract_document_code(text: str) -> Optional[str]:
325
+ """
326
+ Extract legal document code from text (e.g., "thông tư 02", "quyết định 264").
327
+
328
+ Args:
329
+ text: Input text.
330
+
331
+ Returns:
332
+ Document code string or None if not found.
333
+ """
334
+ # Patterns for legal document codes
335
+ patterns = [
336
+ r'\bthông tư\s+(\d+[-\w]*)',
337
+ r'\btt\s+(\d+[-\w]*)',
338
+ r'\bquyết định\s+(\d+[-\w]*)',
339
+ r'\bqd\s+(\d+[-\w]*)',
340
+ r'\bquy định\s+(\d+[-\w]*)',
341
+ r'\b(\d+[-\w]*)\s*[-/]\s*QĐ[-/]TW',
342
+ r'\b(\d+[-\w]*)\s*[-/]\s*TT',
343
+ ]
344
+
345
+ text_lower = text.lower()
346
+ for pattern in patterns:
347
+ matches = re.findall(pattern, text_lower, re.IGNORECASE)
348
+ if matches:
349
+ # Return the full match with document type
350
+ full_match = re.search(pattern, text_lower, re.IGNORECASE)
351
+ if full_match:
352
+ return full_match.group(0)
353
+
354
+ return None
355
+
356
+
357
+ def extract_all_entities(text: str) -> Dict[str, Any]:
358
+ """
359
+ Extract all entities from text.
360
+
361
+ Args:
362
+ text: Input text.
363
+
364
+ Returns:
365
+ Dictionary with all extracted entities.
366
+ """
367
+ entities = {}
368
+
369
+ # Extract fine code
370
+ fine_code = extract_fine_code(text)
371
+ if fine_code:
372
+ entities["fine_code"] = fine_code
373
+
374
+ # Extract procedure name
375
+ procedure_name = extract_procedure_name(text)
376
+ if procedure_name:
377
+ entities["procedure_name"] = procedure_name
378
+
379
+ # Extract office name
380
+ office_name = extract_office_name(text)
381
+ if office_name:
382
+ entities["office_name"] = office_name
383
+
384
+ # Extract document code
385
+ document_code = extract_document_code(text)
386
+ if document_code:
387
+ entities["document_code"] = document_code
388
+
389
+ # Extract pronouns
390
+ pronouns = extract_reference_pronouns(text)
391
+ if pronouns:
392
+ entities["pronouns"] = pronouns
393
+
394
+ return entities
395
+
backend/hue_portal/chatbot/exact_match_cache.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Exact match cache for caching repeated chatbot responses.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import copy
7
+ import time
8
+ import unicodedata
9
+ import re
10
+ from collections import OrderedDict
11
+ from typing import Any, Dict, Optional, Tuple
12
+
13
+
14
+ class ExactMatchCache:
15
+ """LRU cache that stores full chatbot responses for exact queries."""
16
+
17
+ def __init__(self, max_size: int = 256, ttl_seconds: Optional[int] = 43200):
18
+ self.max_size = max(1, max_size)
19
+ self.ttl = ttl_seconds
20
+ self._store: "OrderedDict[str, Tuple[float, Dict[str, Any]]]" = OrderedDict()
21
+
22
+ def get(self, query: str, intent: Optional[str] = None) -> Optional[Dict[str, Any]]:
23
+ """Return cached response if still valid."""
24
+ key = self._make_key(query, intent)
25
+ record = self._store.get(key)
26
+ if not record:
27
+ return None
28
+
29
+ timestamp, payload = record
30
+ if self.ttl and (time.time() - timestamp) > self.ttl:
31
+ self._store.pop(key, None)
32
+ return None
33
+
34
+ self._store.move_to_end(key)
35
+ return copy.deepcopy(payload)
36
+
37
+ def set(self, query: str, intent: Optional[str], response: Dict[str, Any]) -> None:
38
+ """Store response for normalized query/int."""
39
+ key = self._make_key(query, intent)
40
+ self._store[key] = (time.time(), copy.deepcopy(response))
41
+ self._store.move_to_end(key)
42
+ if len(self._store) > self.max_size:
43
+ self._store.popitem(last=False)
44
+
45
+ def clear(self) -> None:
46
+ """Remove all cached entries."""
47
+ self._store.clear()
48
+
49
+ def _make_key(self, query: str, intent: Optional[str]) -> str:
50
+ normalized_query = self._normalize_query(query or "")
51
+ normalized_intent = (intent or "").strip().lower()
52
+ return f"{normalized_intent}::{normalized_query}"
53
+
54
+ def _normalize_query(self, query: str) -> str:
55
+ """Normalize query for stable caching."""
56
+ text = query.lower().strip()
57
+ text = unicodedata.normalize("NFD", text)
58
+ text = "".join(ch for ch in text if unicodedata.category(ch) != "Mn")
59
+ text = re.sub(r"\s+", " ", text)
60
+ return text
61
+
backend/hue_portal/chatbot/fast_path_handler.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fast Path Handler - Returns cached responses from golden dataset.
3
+ """
4
+ from typing import Dict, Any
5
+ from hue_portal.core.models import GoldenQuery
6
+
7
+
8
+ class FastPathHandler:
9
+ """Handle Fast Path queries using golden dataset."""
10
+
11
+ def handle(self, query: str, golden_query_id: int) -> Dict[str, Any]:
12
+ """
13
+ Get cached response from golden dataset.
14
+
15
+ Args:
16
+ query: User query (for logging).
17
+ golden_query_id: ID of matched golden query.
18
+
19
+ Returns:
20
+ Response dict (same format as Slow Path) with additional metadata.
21
+ """
22
+ try:
23
+ golden_query = GoldenQuery.objects.get(id=golden_query_id, is_active=True)
24
+ except GoldenQuery.DoesNotExist:
25
+ # Fallback: return error response
26
+ return {
27
+ "message": "Xin lỗi, không tìm thấy thông tin trong cơ sở dữ liệu.",
28
+ "intent": "error",
29
+ "results": [],
30
+ "count": 0,
31
+ "_source": "fast_path",
32
+ "_error": "golden_query_not_found"
33
+ }
34
+
35
+ # Increment usage count (async update for performance)
36
+ golden_query.usage_count += 1
37
+ golden_query.save(update_fields=['usage_count'])
38
+
39
+ # Return cached response
40
+ response = golden_query.response_data.copy()
41
+
42
+ # Add metadata
43
+ response['_source'] = 'fast_path'
44
+ response['_golden_query_id'] = golden_query_id
45
+ response['_verified_by'] = golden_query.verified_by
46
+ response['_accuracy_score'] = golden_query.accuracy_score
47
+
48
+ # Ensure required fields exist
49
+ if 'message' not in response:
50
+ response['message'] = golden_query.response_message
51
+
52
+ if 'intent' not in response:
53
+ response['intent'] = golden_query.intent
54
+
55
+ if 'count' not in response:
56
+ response['count'] = len(response.get('results', []))
57
+
58
+ return response
59
+
backend/hue_portal/chatbot/legal_guardrails.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Guardrails RAIL schema and helpers for structured legal answers.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from functools import lru_cache
8
+ from pathlib import Path
9
+ from typing import Dict, Optional
10
+
11
+ from guardrails import Guard
12
+
13
+ SCHEMA_DIR = Path(__file__).resolve().parent / "schemas"
14
+ RAIL_PATH = SCHEMA_DIR / "legal_answer.rail"
15
+
16
+
17
+ @lru_cache(maxsize=1)
18
+ def get_legal_guard() -> Guard:
19
+ """Return cached Guard instance for legal answers."""
20
+
21
+ return Guard.from_rail(rail_file=str(RAIL_PATH))
22
+
23
+
24
+ def ensure_schema_files() -> Optional[Dict[str, str]]:
25
+ """
26
+ Return metadata for the legal RAIL schema to help packaging.
27
+
28
+ Called during setup to make sure the schema file is discovered by tools
29
+ such as setup scripts or bundlers.
30
+ """
31
+
32
+ if RAIL_PATH.exists():
33
+ return {"legal_rail": str(RAIL_PATH)}
34
+ return None
35
+
backend/hue_portal/chatbot/llm_integration.py ADDED
@@ -0,0 +1,1746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM integration for natural answer generation.
3
+ Supports OpenAI GPT, Anthropic Claude, Ollama, Hugging Face Inference API, Local Hugging Face models, and API mode.
4
+ """
5
+ import os
6
+ import re
7
+ import json
8
+ import sys
9
+ import traceback
10
+ import logging
11
+ import time
12
+ from pathlib import Path
13
+ from typing import List, Dict, Any, Optional, Set, Tuple
14
+
15
+ from .structured_legal import (
16
+ build_structured_legal_prompt,
17
+ get_legal_output_parser,
18
+ parse_structured_output,
19
+ LegalAnswer,
20
+ )
21
+ from .legal_guardrails import get_legal_guard
22
+ try:
23
+ from dotenv import load_dotenv
24
+ load_dotenv()
25
+ except ImportError:
26
+ pass # dotenv is optional
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ BASE_DIR = Path(__file__).resolve().parents[2]
31
+ GUARDRAILS_LOG_DIR = BASE_DIR / "logs" / "guardrails"
32
+ GUARDRAILS_LOG_FILE = GUARDRAILS_LOG_DIR / "legal_structured.log"
33
+
34
+
35
+ def _write_guardrails_debug(label: str, content: Optional[str]) -> None:
36
+ """Persist raw Guardrails inputs/outputs for debugging."""
37
+ if not content:
38
+ return
39
+ try:
40
+ GUARDRAILS_LOG_DIR.mkdir(parents=True, exist_ok=True)
41
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
42
+ snippet = content.strip()
43
+ max_len = 4000
44
+ if len(snippet) > max_len:
45
+ snippet = snippet[:max_len] + "...[truncated]"
46
+ with GUARDRAILS_LOG_FILE.open("a", encoding="utf-8") as fp:
47
+ fp.write(f"[{timestamp}] [{label}] {snippet}\n{'-' * 80}\n")
48
+ except Exception as exc:
49
+ logger.debug("Unable to write guardrails log: %s", exc)
50
+
51
+
52
+ def _collect_doc_metadata(documents: List[Any]) -> Tuple[Set[str], Set[str]]:
53
+ titles: Set[str] = set()
54
+ sections: Set[str] = set()
55
+ for doc in documents:
56
+ document = getattr(doc, "document", None)
57
+ title = getattr(document, "title", None)
58
+ if title:
59
+ titles.add(title.strip())
60
+ section_code = getattr(doc, "section_code", None)
61
+ if section_code:
62
+ sections.add(section_code.strip())
63
+ return titles, sections
64
+
65
+
66
+ def _contains_any(text: str, tokens: Set[str]) -> bool:
67
+ if not tokens:
68
+ return True
69
+ normalized = text.lower()
70
+ return any(token.lower() in normalized for token in tokens if token)
71
+
72
+
73
+ def _validate_structured_answer(
74
+ answer: "LegalAnswer",
75
+ documents: List[Any],
76
+ ) -> Tuple[bool, str]:
77
+ """Ensure structured answer references actual documents/sections."""
78
+ allowed_titles, allowed_sections = _collect_doc_metadata(documents)
79
+ if allowed_titles and not _contains_any(answer.summary, allowed_titles):
80
+ return False, "Summary thiếu tên văn bản từ bảng tham chiếu"
81
+
82
+ for idx, bullet in enumerate(answer.details, 1):
83
+ if allowed_titles and not _contains_any(bullet, allowed_titles):
84
+ return False, f"Chi tiết {idx} thiếu tên văn bản"
85
+ if allowed_sections and not _contains_any(bullet, allowed_sections):
86
+ return False, f"Chi tiết {idx} thiếu mã điều/khoản"
87
+
88
+ allowed_title_lower = {title.lower() for title in allowed_titles}
89
+ allowed_section_lower = {section.lower() for section in allowed_sections}
90
+
91
+ for idx, citation in enumerate(answer.citations, 1):
92
+ if citation.document_title and citation.document_title.lower() not in allowed_title_lower:
93
+ return False, f"Citation {idx} chứa văn bản không có trong nguồn"
94
+ if (
95
+ citation.section_code
96
+ and allowed_section_lower
97
+ and citation.section_code.lower() not in allowed_section_lower
98
+ ):
99
+ return False, f"Citation {idx} chứa điều/khoản không có trong nguồn"
100
+
101
+ return True, ""
102
+
103
+ # Import download progress tracker (optional)
104
+ try:
105
+ from .download_progress import get_progress_tracker, DownloadProgress
106
+ PROGRESS_TRACKER_AVAILABLE = True
107
+ except ImportError:
108
+ PROGRESS_TRACKER_AVAILABLE = False
109
+ logger.warning("Download progress tracker not available")
110
+
111
+ # LLM Provider types
112
+ LLM_PROVIDER_OPENAI = "openai"
113
+ LLM_PROVIDER_ANTHROPIC = "anthropic"
114
+ LLM_PROVIDER_OLLAMA = "ollama"
115
+ LLM_PROVIDER_HUGGINGFACE = "huggingface" # Hugging Face Inference API
116
+ LLM_PROVIDER_LOCAL = "local" # Local Hugging Face Transformers model
117
+ LLM_PROVIDER_LLAMA_CPP = "llama_cpp" # GGUF via llama.cpp
118
+ LLM_PROVIDER_API = "api" # API mode - call HF Spaces API
119
+ LLM_PROVIDER_NONE = "none"
120
+
121
+ # Get provider from environment (default to llama.cpp Gemma if none provided)
122
+ DEFAULT_LLM_PROVIDER = os.environ.get(
123
+ "DEFAULT_LLM_PROVIDER",
124
+ LLM_PROVIDER_LLAMA_CPP,
125
+ ).lower()
126
+ env_provider = os.environ.get("LLM_PROVIDER", "").strip().lower()
127
+ LLM_PROVIDER = env_provider or DEFAULT_LLM_PROVIDER
128
+ LLM_MODE = os.environ.get("LLM_MODE", "answer").strip().lower() or "answer"
129
+ LEGAL_STRUCTURED_MAX_ATTEMPTS = max(
130
+ 1, int(os.environ.get("LEGAL_STRUCTURED_MAX_ATTEMPTS", "2"))
131
+ )
132
+
133
+
134
+ class LLMGenerator:
135
+ """Generate natural language answers using LLMs."""
136
+
137
+ # Class-level cache for llama.cpp model (shared across all instances in same process)
138
+ _llama_cpp_shared = None
139
+ _llama_cpp_model_path_shared = None
140
+
141
+ def __init__(self, provider: Optional[str] = None):
142
+ """
143
+ Initialize LLM generator.
144
+
145
+ Args:
146
+ provider: LLM provider ('openai', 'anthropic', 'ollama', 'local', 'huggingface', 'api', or None for auto-detect).
147
+ """
148
+ self.provider = provider or LLM_PROVIDER
149
+ self.llm_mode = LLM_MODE if LLM_MODE in {"keywords", "answer"} else "answer"
150
+ self.client = None
151
+ self.local_model = None
152
+ self.local_tokenizer = None
153
+ self.llama_cpp = None
154
+ self.llama_cpp_model_path = None
155
+ self.api_base_url = None
156
+ self._initialize_client()
157
+
158
+ def _initialize_client(self):
159
+ """Initialize LLM client based on provider."""
160
+ if self.provider == LLM_PROVIDER_OPENAI:
161
+ try:
162
+ import openai
163
+ api_key = os.environ.get("OPENAI_API_KEY")
164
+ if api_key:
165
+ self.client = openai.OpenAI(api_key=api_key)
166
+ print("✅ OpenAI client initialized")
167
+ else:
168
+ print("⚠️ OPENAI_API_KEY not found, OpenAI disabled")
169
+ except ImportError:
170
+ print("⚠️ openai package not installed, install with: pip install openai")
171
+
172
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
173
+ try:
174
+ import anthropic
175
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
176
+ if api_key:
177
+ self.client = anthropic.Anthropic(api_key=api_key)
178
+ print("✅ Anthropic client initialized")
179
+ else:
180
+ print("⚠️ ANTHROPIC_API_KEY not found, Anthropic disabled")
181
+ except ImportError:
182
+ print("⚠️ anthropic package not installed, install with: pip install anthropic")
183
+
184
+ elif self.provider == LLM_PROVIDER_OLLAMA:
185
+ self.ollama_base_url = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
186
+ self.ollama_model = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b")
187
+ print(f"✅ Ollama configured (base_url: {self.ollama_base_url}, model: {self.ollama_model})")
188
+
189
+ elif self.provider == LLM_PROVIDER_HUGGINGFACE:
190
+ self.hf_api_key = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY")
191
+ self.hf_model = os.environ.get("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
192
+ if self.hf_api_key:
193
+ print(f"✅ Hugging Face API configured (model: {self.hf_model})")
194
+ else:
195
+ print("⚠️ HF_TOKEN not found, Hugging Face may have rate limits")
196
+
197
+ elif self.provider == LLM_PROVIDER_API:
198
+ # API mode - call HF Spaces API
199
+ self.api_base_url = os.environ.get(
200
+ "HF_API_BASE_URL",
201
+ "https://davidtran999-hue-portal-backend.hf.space/api"
202
+ )
203
+ print(f"✅ API mode configured (base_url: {self.api_base_url})")
204
+
205
+ elif self.provider == LLM_PROVIDER_LLAMA_CPP:
206
+ self._initialize_llama_cpp_model()
207
+
208
+ elif self.provider == LLM_PROVIDER_LOCAL:
209
+ self._initialize_local_model()
210
+
211
+ else:
212
+ print("ℹ️ No LLM provider configured, using template-based generation")
213
+
214
+ def _initialize_local_model(self):
215
+ """Initialize local Hugging Face Transformers model."""
216
+ try:
217
+ from transformers import AutoModelForCausalLM, AutoTokenizer
218
+ import torch
219
+
220
+ # Default to Qwen 2.5 7B with 8-bit quantization (fits in GPU RAM)
221
+ model_path = os.environ.get("LOCAL_MODEL_PATH", "Qwen/Qwen2.5-7B-Instruct")
222
+ device = os.environ.get("LOCAL_MODEL_DEVICE", "auto") # auto, cpu, cuda
223
+
224
+ print(f"[LLM] Loading local model: {model_path}", flush=True)
225
+ logger.info(f"[LLM] Loading local model: {model_path}")
226
+
227
+ # Determine device
228
+ if device == "auto":
229
+ device = "cuda" if torch.cuda.is_available() else "cpu"
230
+
231
+ # Start cache monitoring for download progress (optional)
232
+ try:
233
+ from .cache_monitor import get_cache_monitor
234
+ monitor = get_cache_monitor()
235
+ monitor.start_monitoring(model_path, interval=2.0)
236
+ print(f"[LLM] 📊 Started cache monitoring for {model_path}", flush=True)
237
+ logger.info(f"[LLM] 📊 Started cache monitoring for {model_path}")
238
+ except Exception as e:
239
+ logger.warning(f"Could not start cache monitoring: {e}")
240
+
241
+ # Load tokenizer
242
+ print("[LLM] Loading tokenizer...", flush=True)
243
+ logger.info("[LLM] Loading tokenizer...")
244
+ try:
245
+ self.local_tokenizer = AutoTokenizer.from_pretrained(
246
+ model_path,
247
+ trust_remote_code=True
248
+ )
249
+ print("[LLM] ✅ Tokenizer loaded successfully", flush=True)
250
+ logger.info("[LLM] ✅ Tokenizer loaded successfully")
251
+ except Exception as tokenizer_err:
252
+ error_trace = traceback.format_exc()
253
+ print(f"[LLM] ❌ Tokenizer load error: {tokenizer_err}", flush=True)
254
+ print(f"[LLM] ❌ Tokenizer trace: {error_trace}", flush=True)
255
+ logger.error(f"[LLM] ❌ Tokenizer load error: {tokenizer_err}\n{error_trace}")
256
+ print(f"[LLM] ❌ ERROR: {type(tokenizer_err).__name__}: {str(tokenizer_err)}", file=sys.stderr, flush=True)
257
+ traceback.print_exc(file=sys.stderr)
258
+ raise
259
+
260
+ # Load model with optional quantization and fallback mechanism
261
+ print(f"[LLM] Loading model to {device}...", flush=True)
262
+ logger.info(f"[LLM] Loading model to {device}...")
263
+
264
+ # Check for quantization config
265
+ # Default to 8-bit for 7B (better thinking), 4-bit for larger models
266
+ default_8bit = "7b" in model_path.lower() or "7B" in model_path
267
+ default_4bit = ("32b" in model_path.lower() or "32B" in model_path or "14b" in model_path.lower() or "14B" in model_path) and not default_8bit
268
+
269
+ # Check environment variable for explicit quantization preference
270
+ quantization_pref = os.environ.get("LOCAL_MODEL_QUANTIZATION", "").lower()
271
+ if quantization_pref == "4bit":
272
+ use_8bit = False
273
+ use_4bit = True
274
+ elif quantization_pref == "8bit":
275
+ use_8bit = True
276
+ use_4bit = False
277
+ elif quantization_pref == "none":
278
+ use_8bit = False
279
+ use_4bit = False
280
+ else:
281
+ # Use defaults based on model size
282
+ use_8bit = os.environ.get("LOCAL_MODEL_8BIT", "true" if default_8bit else "false").lower() == "true"
283
+ use_4bit = os.environ.get("LOCAL_MODEL_4BIT", "true" if default_4bit else "false").lower() == "true"
284
+
285
+ # Try loading with fallback: 8-bit → 4-bit → float16
286
+ model_loaded = False
287
+ quantization_attempts = []
288
+
289
+ if device == "cuda":
290
+ # Attempt 1: Try 8-bit quantization (if requested)
291
+ if use_8bit:
292
+ quantization_attempts.append(("8-bit", True, False))
293
+
294
+ # Attempt 2: Try 4-bit quantization (if 8-bit fails or not requested)
295
+ if use_4bit or (use_8bit and not model_loaded):
296
+ quantization_attempts.append(("4-bit", False, True))
297
+
298
+ # Attempt 3: Fallback to float16 (no quantization)
299
+ quantization_attempts.append(("float16", False, False))
300
+ else:
301
+ # CPU: only float32
302
+ quantization_attempts.append(("float32", False, False))
303
+
304
+ last_error = None
305
+ for attempt_name, try_8bit, try_4bit in quantization_attempts:
306
+ if model_loaded:
307
+ break
308
+
309
+ try:
310
+ load_kwargs = {
311
+ "trust_remote_code": True,
312
+ "low_cpu_mem_usage": True,
313
+ }
314
+
315
+ if device == "cuda":
316
+ load_kwargs["device_map"] = "auto"
317
+
318
+ if try_4bit:
319
+ # Check if bitsandbytes is available
320
+ try:
321
+ import bitsandbytes as bnb
322
+ from transformers import BitsAndBytesConfig
323
+ load_kwargs["quantization_config"] = BitsAndBytesConfig(
324
+ load_in_4bit=True,
325
+ bnb_4bit_compute_dtype=torch.float16
326
+ )
327
+ print(f"[LLM] Attempting to load with 4-bit quantization (~4-5GB VRAM for 7B)", flush=True)
328
+ except ImportError:
329
+ print(f"[LLM] ⚠️ bitsandbytes not available, skipping 4-bit quantization", flush=True)
330
+ raise ImportError("bitsandbytes not available")
331
+ elif try_8bit:
332
+ from transformers import BitsAndBytesConfig
333
+ # Fixed: Remove CPU offload to avoid Int8Params compatibility issue
334
+ load_kwargs["quantization_config"] = BitsAndBytesConfig(
335
+ load_in_8bit=True,
336
+ llm_int8_threshold=6.0
337
+ # Removed: llm_int8_enable_fp32_cpu_offload=True (causes compatibility issues)
338
+ )
339
+ # Removed: max_memory override - let accelerate handle it automatically
340
+ print(f"[LLM] Attempting to load with 8-bit quantization (~7GB VRAM for 7B)", flush=True)
341
+ else:
342
+ load_kwargs["torch_dtype"] = torch.float16
343
+ print(f"[LLM] Attempting to load with float16 (no quantization)", flush=True)
344
+ else:
345
+ load_kwargs["torch_dtype"] = torch.float32
346
+ print(f"[LLM] Attempting to load with float32 (CPU)", flush=True)
347
+
348
+ # Load model
349
+ self.local_model = AutoModelForCausalLM.from_pretrained(
350
+ model_path,
351
+ **load_kwargs
352
+ )
353
+
354
+ # Stop cache monitoring (download complete)
355
+ try:
356
+ from .cache_monitor import get_cache_monitor
357
+ monitor = get_cache_monitor()
358
+ monitor.stop_monitoring(model_path)
359
+ print(f"[LLM] ✅ Model download complete, stopped monitoring", flush=True)
360
+ except:
361
+ pass
362
+
363
+ print(f"[LLM] ✅ Model loaded successfully with {attempt_name} quantization", flush=True)
364
+ logger.info(f"[LLM] ✅ Model loaded successfully with {attempt_name} quantization")
365
+
366
+ # Optional: Compile model for faster inference (PyTorch 2.0+)
367
+ try:
368
+ if hasattr(torch, "compile") and device == "cuda":
369
+ print(f"[LLM] ⚡ Compiling model for faster inference...", flush=True)
370
+ self.local_model = torch.compile(self.local_model, mode="reduce-overhead")
371
+ print(f"[LLM] ✅ Model compiled successfully", flush=True)
372
+ logger.info(f"[LLM] ✅ Model compiled for faster inference")
373
+ except Exception as compile_err:
374
+ print(f"[LLM] ⚠️ Model compilation skipped: {compile_err}", flush=True)
375
+ # Continue without compilation
376
+
377
+ model_loaded = True
378
+
379
+ except Exception as model_load_err:
380
+ last_error = model_load_err
381
+ error_trace = traceback.format_exc()
382
+ print(f"[LLM] ⚠️ Failed to load with {attempt_name}: {model_load_err}", flush=True)
383
+ logger.warning(f"[LLM] ⚠️ Failed to load with {attempt_name}: {model_load_err}")
384
+
385
+ # If this was the last attempt, raise the error
386
+ if attempt_name == quantization_attempts[-1][0]:
387
+ print(f"[LLM] ❌ All quantization attempts failed. Last error: {model_load_err}", flush=True)
388
+ print(f"[LLM] ❌ Model load trace: {error_trace}", flush=True)
389
+ logger.error(f"[LLM] ❌ Model load error: {model_load_err}\n{error_trace}")
390
+ print(f"[LLM] ❌ ERROR: {type(model_load_err).__name__}: {str(model_load_err)}", file=sys.stderr, flush=True)
391
+ traceback.print_exc(file=sys.stderr)
392
+ raise
393
+ else:
394
+ # Try next quantization method
395
+ print(f"[LLM] 🔄 Falling back to next quantization method...", flush=True)
396
+ continue
397
+
398
+ if not model_loaded:
399
+ raise RuntimeError("Failed to load model with any quantization method")
400
+
401
+ if device == "cpu":
402
+ try:
403
+ self.local_model = self.local_model.to(device)
404
+ print(f"[LLM] ✅ Model moved to {device}", flush=True)
405
+ logger.info(f"[LLM] ✅ Model moved to {device}")
406
+ except Exception as move_err:
407
+ error_trace = traceback.format_exc()
408
+ print(f"[LLM] ❌ Model move error: {move_err}", flush=True)
409
+ logger.error(f"[LLM] ❌ Model move error: {move_err}\n{error_trace}")
410
+ print(f"[LLM] ❌ ERROR: {type(move_err).__name__}: {str(move_err)}", file=sys.stderr, flush=True)
411
+ traceback.print_exc(file=sys.stderr)
412
+
413
+ self.local_model.eval() # Set to evaluation mode
414
+ print(f"[LLM] ✅ Local model loaded successfully on {device}", flush=True)
415
+ logger.info(f"[LLM] ✅ Local model loaded successfully on {device}")
416
+
417
+ except ImportError as import_err:
418
+ error_msg = "transformers package not installed, install with: pip install transformers torch"
419
+ print(f"[LLM] ⚠️ {error_msg}", flush=True)
420
+ logger.warning(f"[LLM] ⚠️ {error_msg}")
421
+ print(f"[LLM] ❌ ImportError: {import_err}", file=sys.stderr, flush=True)
422
+ self.local_model = None
423
+ self.local_tokenizer = None
424
+ except Exception as e:
425
+ error_trace = traceback.format_exc()
426
+ print(f"[LLM] ❌ Error loading local model: {e}", flush=True)
427
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
428
+ logger.error(f"[LLM] ❌ Error loading local model: {e}\n{error_trace}")
429
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
430
+ traceback.print_exc(file=sys.stderr)
431
+ print("[LLM] 💡 Tip: Use smaller models like Qwen/Qwen2.5-1.5B-Instruct or Qwen/Qwen2.5-0.5B-Instruct", flush=True)
432
+ self.local_model = None
433
+ self.local_tokenizer = None
434
+
435
+ def _initialize_llama_cpp_model(self) -> None:
436
+ """Initialize llama.cpp runtime for GGUF inference."""
437
+ # Use shared model if available (singleton pattern for process-level reuse)
438
+ if LLMGenerator._llama_cpp_shared is not None:
439
+ self.llama_cpp = LLMGenerator._llama_cpp_shared
440
+ self.llama_cpp_model_path = LLMGenerator._llama_cpp_model_path_shared
441
+ print("[LLM] ♻️ Reusing shared llama.cpp model (kept alive)", flush=True)
442
+ logger.debug("[LLM] Reusing shared llama.cpp model (kept alive)")
443
+ return
444
+
445
+ # Skip if instance model already loaded
446
+ if self.llama_cpp is not None:
447
+ print("[LLM] ♻️ llama.cpp model already loaded, skipping re-initialization", flush=True)
448
+ logger.debug("[LLM] llama.cpp model already loaded, skipping re-initialization")
449
+ return
450
+
451
+ try:
452
+ from llama_cpp import Llama
453
+ except ImportError:
454
+ print("⚠️ llama-cpp-python not installed. Run: pip install llama-cpp-python", flush=True)
455
+ logger.warning("llama-cpp-python not installed")
456
+ return
457
+
458
+ model_path = os.environ.get(
459
+ "LLAMA_CPP_MODEL_PATH",
460
+ # Mặc định trỏ tới file GGUF local trong backend/models
461
+ str(BASE_DIR / "models" / "gemma-2b-it-Q5_K_M.gguf"),
462
+ )
463
+ resolved_path = self._resolve_llama_cpp_model_path(model_path)
464
+ if not resolved_path:
465
+ print("❌ Unable to resolve GGUF model path for llama.cpp", flush=True)
466
+ logger.error("Unable to resolve GGUF model path for llama.cpp")
467
+ return
468
+
469
+ # CPU-friendly defaults: smaller context/batch to reduce latency/RAM
470
+ n_ctx = int(os.environ.get("LLAMA_CPP_CONTEXT", "8192"))
471
+ n_threads = int(os.environ.get("LLAMA_CPP_THREADS", "4"))
472
+ n_batch = int(os.environ.get("LLAMA_CPP_BATCH", "1024"))
473
+ n_gpu_layers = int(os.environ.get("LLAMA_CPP_GPU_LAYERS", "0"))
474
+ use_mmap = os.environ.get("LLAMA_CPP_USE_MMAP", "true").lower() == "true"
475
+ use_mlock = os.environ.get("LLAMA_CPP_USE_MLOCK", "true").lower() == "true"
476
+ rope_freq_base = os.environ.get("LLAMA_CPP_ROPE_FREQ_BASE")
477
+ rope_freq_scale = os.environ.get("LLAMA_CPP_ROPE_FREQ_SCALE")
478
+
479
+ llama_kwargs = {
480
+ "model_path": resolved_path,
481
+ "n_ctx": n_ctx,
482
+ "n_batch": n_batch,
483
+ "n_threads": n_threads,
484
+ "n_gpu_layers": n_gpu_layers,
485
+ "use_mmap": use_mmap,
486
+ "use_mlock": use_mlock,
487
+ "logits_all": False,
488
+ }
489
+ if rope_freq_base and rope_freq_scale:
490
+ try:
491
+ llama_kwargs["rope_freq_base"] = float(rope_freq_base)
492
+ llama_kwargs["rope_freq_scale"] = float(rope_freq_scale)
493
+ except ValueError:
494
+ logger.warning("Invalid rope frequency overrides, ignoring custom values.")
495
+
496
+ try:
497
+ print(f"[LLM] Loading llama.cpp model: {resolved_path}", flush=True)
498
+ logger.info("[LLM] Loading llama.cpp model from %s", resolved_path)
499
+ self.llama_cpp = Llama(**llama_kwargs)
500
+ self.llama_cpp_model_path = resolved_path
501
+ # Store in shared cache for reuse across instances
502
+ LLMGenerator._llama_cpp_shared = self.llama_cpp
503
+ LLMGenerator._llama_cpp_model_path_shared = resolved_path
504
+ print(
505
+ f"[LLM] ✅ llama.cpp ready (ctx={n_ctx}, threads={n_threads}, batch={n_batch}) - Model cached for reuse",
506
+ flush=True,
507
+ )
508
+ logger.info(
509
+ "[LLM] ✅ llama.cpp ready (ctx=%s, threads=%s, batch=%s)",
510
+ n_ctx,
511
+ n_threads,
512
+ n_batch,
513
+ )
514
+ except Exception as exc:
515
+ error_trace = traceback.format_exc()
516
+ print(f"[LLM] ❌ Failed to load llama.cpp model: {exc}", flush=True)
517
+ print(f"[LLM] ❌ Trace: {error_trace}", flush=True)
518
+ logger.error("Failed to load llama.cpp model: %s\n%s", exc, error_trace)
519
+ self.llama_cpp = None
520
+
521
+ def _resolve_llama_cpp_model_path(self, configured_path: str) -> Optional[str]:
522
+ """Resolve GGUF model path, downloading from Hugging Face if needed."""
523
+ potential_path = Path(configured_path)
524
+ if potential_path.is_file():
525
+ logger.info(f"[LLM] Using existing model file: {potential_path}")
526
+ return str(potential_path)
527
+
528
+ repo_id = os.environ.get(
529
+ "LLAMA_CPP_MODEL_REPO",
530
+ "QuantFactory/gemma-2-2b-it-GGUF",
531
+ )
532
+ filename = os.environ.get(
533
+ "LLAMA_CPP_MODEL_FILE",
534
+ "gemma-2-2b-it-Q5_K_M.gguf",
535
+ )
536
+ cache_dir = Path(os.environ.get("LLAMA_CPP_CACHE_DIR", BASE_DIR / "models"))
537
+ cache_dir.mkdir(parents=True, exist_ok=True)
538
+
539
+ # Check if file already exists in cache_dir (avoid re-downloading)
540
+ cached_file = cache_dir / filename
541
+ if cached_file.is_file():
542
+ logger.info(f"[LLM] Using cached model file: {cached_file}")
543
+ print(f"[LLM] ✅ Found cached model: {cached_file}", flush=True)
544
+ return str(cached_file)
545
+
546
+ try:
547
+ from huggingface_hub import hf_hub_download
548
+ except ImportError:
549
+ print("⚠️ huggingface_hub not installed. Run: pip install huggingface_hub", flush=True)
550
+ logger.warning("huggingface_hub not installed")
551
+ return None
552
+
553
+ try:
554
+ print(f"[LLM] Downloading model from Hugging Face: {repo_id}/{filename}", flush=True)
555
+ logger.info(f"[LLM] Downloading model from Hugging Face: {repo_id}/{filename}")
556
+ # hf_hub_download has built-in caching - won't re-download if file exists in HF cache
557
+ downloaded_path = hf_hub_download(
558
+ repo_id=repo_id,
559
+ filename=filename,
560
+ local_dir=str(cache_dir),
561
+ local_dir_use_symlinks=False,
562
+ # Force download only if file doesn't exist (hf_hub_download checks cache automatically)
563
+ )
564
+ print(f"[LLM] ✅ Model downloaded/cached: {downloaded_path}", flush=True)
565
+ logger.info(f"[LLM] ✅ Model downloaded/cached: {downloaded_path}")
566
+ return downloaded_path
567
+ except Exception as exc:
568
+ error_trace = traceback.format_exc()
569
+ print(f"[LLM] ❌ Failed to download GGUF model: {exc}", flush=True)
570
+ print(f"[LLM] ❌ Trace: {error_trace}", flush=True)
571
+ logger.error("Failed to download GGUF model: %s\n%s", exc, error_trace)
572
+ return None
573
+
574
+ def is_available(self) -> bool:
575
+ """Check if LLM is available."""
576
+ return (
577
+ self.client is not None
578
+ or self.provider == LLM_PROVIDER_OLLAMA
579
+ or self.provider == LLM_PROVIDER_HUGGINGFACE
580
+ or self.provider == LLM_PROVIDER_API
581
+ or (self.provider == LLM_PROVIDER_LOCAL and self.local_model is not None)
582
+ or (self.provider == LLM_PROVIDER_LLAMA_CPP and self.llama_cpp is not None)
583
+ )
584
+
585
+ def generate_answer(
586
+ self,
587
+ query: str,
588
+ context: Optional[List[Dict[str, Any]]] = None,
589
+ documents: Optional[List[Any]] = None
590
+ ) -> Optional[str]:
591
+ """
592
+ Generate natural language answer from documents.
593
+
594
+ Args:
595
+ query: User query.
596
+ context: Optional conversation context.
597
+ documents: Retrieved documents.
598
+
599
+ Returns:
600
+ Generated answer or None if LLM not available.
601
+ """
602
+ if not self.is_available():
603
+ return None
604
+
605
+ prompt = self._build_prompt(query, context, documents)
606
+ return self._generate_from_prompt(prompt, context=context)
607
+
608
+ def _build_prompt(
609
+ self,
610
+ query: str,
611
+ context: Optional[List[Dict[str, Any]]],
612
+ documents: Optional[List[Any]]
613
+ ) -> str:
614
+ """Build prompt for LLM."""
615
+ prompt_parts = [
616
+ "Bạn là chuyên gia tư vấn về xử lí kỷ luật cán bộ đảng viên của Phòng Thanh Tra - Công An Thành Phố Huế.",
617
+ "Nhiệm vụ: Trả lời câu hỏi của người dùng dựa trên các văn bản quy định pháp luật về xử lí kỷ luật cán bộ đảng viên được cung cấp.",
618
+ "",
619
+ f"Câu hỏi của người dùng: {query}",
620
+ ""
621
+ ]
622
+
623
+ if context:
624
+ prompt_parts.append("Ngữ cảnh cuộc hội thoại trước đó:")
625
+ for msg in context[-3:]: # Last 3 messages
626
+ role = "Người dùng" if msg.get("role") == "user" else "Bot"
627
+ content = msg.get("content", "")
628
+ prompt_parts.append(f"{role}: {content}")
629
+ prompt_parts.append("")
630
+
631
+ if documents:
632
+ prompt_parts.append("Các văn bản/quy định liên quan:")
633
+ # 4 chunks for good context and speed balance
634
+ for i, doc in enumerate(documents[:4], 1):
635
+ # Extract relevant fields based on document type
636
+ doc_text = self._format_document(doc)
637
+ prompt_parts.append(f"{i}. {doc_text}")
638
+ prompt_parts.append("")
639
+ # If documents exist, require strict adherence
640
+ prompt_parts.extend([
641
+ "Yêu cầu QUAN TRỌNG:",
642
+ "- CHỈ trả lời dựa trên thông tin trong 'Các văn bản/quy định liên quan' ở trên",
643
+ "- KHÔNG được tự tạo hoặc suy đoán thông tin không có trong tài liệu",
644
+ "- Khi đã có trích đoạn, phải tổng hợp theo cấu trúc rõ ràng:\n 1) Tóm tắt ngắn gọn nội dung chính\n 2) Liệt kê từng điều/khoản hoặc hình thức xử lý (dùng bullet/đánh số, ghi rõ Điều, Khoản, trang, tên văn bản)\n 3) Kết luận + khuyến nghị áp dụng.",
645
+ "- Luôn nhắc tên văn bản (ví dụ: Quyết định 69/QĐ-TW) và mã điều trong nội dung trả lời.",
646
+ "- Kết thúc phần trả lời bằng câu: '(Xem trích dẫn chi tiết bên dưới)'.",
647
+ "- Không dùng những câu chung chung như 'Rất tiếc' hay 'Tôi không thể giúp', hãy trả lời thẳng vào câu hỏi.",
648
+ "- Chỉ khi HOÀN TOÀN không có thông tin trong tài liệu mới được nói: 'Thông tin trong cơ sở dữ liệu chưa đủ để trả lời câu hỏi này'",
649
+ "- Nếu có mức phạt, phải ghi rõ số tiền (ví dụ: 200.000 - 400.000 VNĐ)",
650
+ "- Nếu có điều khoản, ghi rõ mã điều (ví dụ: Điều 5, Điều 10)",
651
+ "- Nếu có thủ tục, ghi rõ hồ sơ, lệ phí, thời hạn",
652
+ "- Trả lời bằng tiếng Việt, ngắn gọn, dễ hiểu",
653
+ "",
654
+ "Trả lời:"
655
+ ])
656
+ else:
657
+ # No documents - allow general conversation
658
+ prompt_parts.extend([
659
+ "Yêu cầu:",
660
+ "- Trả lời câu hỏi một cách tự nhiên và hữu ích như một chatbot AI thông thường.",
661
+ "- Phản hồi phải có ít nhất 2 đoạn (mỗi đoạn ≥ 2 câu) và tổng cộng ≥ 6 câu.",
662
+ "- Luôn có ít nhất 1 danh sách bullet hoặc đánh số để người dùng dễ làm theo.",
663
+ "- Với chủ đề đời sống (ẩm thực, sức khỏe, du lịch, công nghệ...), hãy đưa ra gợi ý thật đầy đủ, gồm tối thiểu 4-6 câu hoặc 2 đoạn nội dung.",
664
+ "- Nếu câu hỏi cần công thức/nấu ăn: liệt kê NGUYÊN LIỆU rõ ràng (dạng bullet) và CÁC BƯỚC chi tiết (đánh số 1,2,3...). Đề xuất thêm mẹo hoặc biến tấu phù hợp.",
665
+ "- Với các chủ đề mẹo vặt khác, hãy chia nhỏ câu trả lời thành từng phần (Ví dụ: Bối cảnh → Các bước → Lưu ý).",
666
+ "- Tuyệt đối không mở đầu bằng lời xin lỗi hoặc từ chối; hãy đi thẳng vào nội dung chính.",
667
+ "- Nếu câu hỏi liên quan đến pháp luật, thủ tục, mức phạt nhưng không có thông tin trong cơ sở dữ liệu, hãy nói: 'Tôi không tìm thấy thông tin này trong cơ sở dữ liệu. Bạn có thể liên hệ trực tiếp với Công an thành phố Huế để được tư vấn chi tiết hơn.'",
668
+ "- Giữ giọng điệu thân thiện, khích lệ, giống một người bạn hiểu biết.",
669
+ "- Trả lời bằng tiếng Việt, mạch lạc, dễ hiểu, ưu tiên trình bày có tiêu đề/phân đoạn để người đọc dễ làm theo.",
670
+ "",
671
+ "Trả lời:"
672
+ ])
673
+
674
+ return "\n".join(prompt_parts)
675
+
676
+ def _generate_from_prompt(
677
+ self,
678
+ prompt: str,
679
+ context: Optional[List[Dict[str, Any]]] = None,
680
+ llm_mode: Optional[str] = None,
681
+ ) -> Optional[str]:
682
+ """Run current provider with a fully formatted prompt."""
683
+ mode = (llm_mode or self.llm_mode or "answer").strip().lower()
684
+ if mode not in {"keywords", "answer"}:
685
+ mode = "answer"
686
+ if not self.is_available():
687
+ return None
688
+
689
+ try:
690
+ print(f"[LLM] Generating answer with provider: {self.provider}", flush=True)
691
+ logger.info(f"[LLM] Generating answer with provider: {self.provider}")
692
+
693
+ if self.provider == LLM_PROVIDER_OPENAI:
694
+ result = self._generate_openai(prompt)
695
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
696
+ result = self._generate_anthropic(prompt)
697
+ elif self.provider == LLM_PROVIDER_OLLAMA:
698
+ result = self._generate_ollama(prompt)
699
+ elif self.provider == LLM_PROVIDER_HUGGINGFACE:
700
+ result = self._generate_huggingface(prompt, mode)
701
+ elif self.provider == LLM_PROVIDER_LOCAL:
702
+ result = self._generate_local(prompt, mode)
703
+ elif self.provider == LLM_PROVIDER_LLAMA_CPP:
704
+ result = self._generate_llama_cpp(prompt, mode)
705
+ elif self.provider == LLM_PROVIDER_API:
706
+ result = self._generate_api(prompt, context)
707
+ else:
708
+ result = None
709
+
710
+ if result:
711
+ print(
712
+ f"[LLM] ✅ Answer generated successfully (length: {len(result)})",
713
+ flush=True,
714
+ )
715
+ logger.info(
716
+ f"[LLM] ✅ Answer generated successfully (length: {len(result)})"
717
+ )
718
+ else:
719
+ print(f"[LLM] ⚠️ No answer generated", flush=True)
720
+ logger.warning("[LLM] ⚠️ No answer generated")
721
+
722
+ return result
723
+ except Exception as exc:
724
+ error_trace = traceback.format_exc()
725
+ print(f"[LLM] ❌ Error generating answer: {exc}", flush=True)
726
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
727
+ logger.error(f"[LLM] ❌ Error generating answer: {exc}\n{error_trace}")
728
+ print(
729
+ f"[LLM] ❌ ERROR: {type(exc).__name__}: {str(exc)}",
730
+ file=sys.stderr,
731
+ flush=True,
732
+ )
733
+ traceback.print_exc(file=sys.stderr)
734
+ return None
735
+
736
+ def suggest_clarification_topics(
737
+ self,
738
+ query: str,
739
+ candidates: List[Dict[str, Any]],
740
+ max_options: int = 3,
741
+ ) -> Optional[Dict[str, Any]]:
742
+ """
743
+ Ask the LLM to propose clarification options based on candidate documents.
744
+ """
745
+ if not candidates or not self.is_available():
746
+ return None
747
+
748
+ candidate_lines = []
749
+ for idx, candidate in enumerate(candidates[: max_options + 2], 1):
750
+ title = candidate.get("title") or candidate.get("code") or "Văn bản"
751
+ summary = candidate.get("summary") or candidate.get("section_title") or ""
752
+ doc_type = candidate.get("doc_type") or ""
753
+ candidate_lines.append(
754
+ f"{idx}. {candidate.get('code', '').upper()} – {title}\n"
755
+ f" Loại: {doc_type or 'không rõ'}; Tóm tắt: {summary[:200] or 'Không có'}"
756
+ )
757
+
758
+ prompt = (
759
+ "Bạn là trợ lý pháp luật. Người dùng vừa hỏi:\n"
760
+ f"\"{query.strip()}\"\n\n"
761
+ "Đây là các văn bản ứng viên có thể liên quan:\n"
762
+ f"{os.linesep.join(candidate_lines)}\n\n"
763
+ "Hãy chọn tối đa {max_options} văn bản quan trọng cần người dùng xác nhận để tôi tra cứu chính xác.\n"
764
+ "Yêu cầu trả về JSON với dạng:\n"
765
+ "{\n"
766
+ ' "message": "Câu nhắc người dùng bằng tiếng Việt",\n'
767
+ ' "options": [\n'
768
+ ' {"code": "MÃ VĂN BẢN", "title": "Tên văn bản", "reason": "Lý do gợi ý"},\n'
769
+ " ...\n"
770
+ " ]\n"
771
+ "}\n"
772
+ "Chỉ in JSON, không thêm lời giải thích khác."
773
+ ).format(max_options=max_options)
774
+
775
+ raw = self._generate_from_prompt(prompt, llm_mode="keywords")
776
+ if not raw:
777
+ return None
778
+
779
+ parsed = self._extract_json_payload(raw)
780
+ if not parsed:
781
+ return None
782
+
783
+ options = parsed.get("options") or []
784
+ sanitized_options = []
785
+ for option in options:
786
+ code = (option.get("code") or "").strip()
787
+ title = (option.get("title") or "").strip()
788
+ if not code or not title:
789
+ continue
790
+ sanitized_options.append(
791
+ {
792
+ "code": code.upper(),
793
+ "title": title,
794
+ "reason": (option.get("reason") or "").strip(),
795
+ }
796
+ )
797
+ if len(sanitized_options) >= max_options:
798
+ break
799
+
800
+ if not sanitized_options:
801
+ return None
802
+
803
+ message = (parsed.get("message") or "Tôi cần bạn chọn văn bản muốn tra cứu chi tiết hơn.").strip()
804
+ return {"message": message, "options": sanitized_options}
805
+
806
+ def suggest_topic_options(
807
+ self,
808
+ query: str,
809
+ document_code: str,
810
+ document_title: str,
811
+ search_results: List[Dict[str, Any]],
812
+ conversation_context: Optional[List[Dict[str, str]]] = None,
813
+ max_options: int = 3,
814
+ ) -> Optional[Dict[str, Any]]:
815
+ """
816
+ Ask the LLM to propose topic/section options within a selected document.
817
+
818
+ Args:
819
+ query: Original user query
820
+ document_code: Selected document code
821
+ document_title: Selected document title
822
+ search_results: Pre-searched sections from the document
823
+ conversation_context: Recent conversation history
824
+ max_options: Maximum number of options to return
825
+
826
+ Returns:
827
+ Dict with message, options, and search_keywords
828
+ """
829
+ if not self.is_available():
830
+ return None
831
+
832
+ # Build context summary
833
+ context_summary = ""
834
+ if conversation_context:
835
+ recent_messages = conversation_context[-3:] # Last 3 messages
836
+ context_summary = "\n".join([
837
+ f"{msg.get('role', 'user')}: {msg.get('content', '')[:100]}"
838
+ for msg in recent_messages
839
+ ])
840
+
841
+ # Format search results as candidates
842
+ candidate_lines = []
843
+ for idx, result in enumerate(search_results[:max_options + 2], 1):
844
+ section_title = result.get("section_title") or result.get("title") or ""
845
+ article = result.get("article") or result.get("article_number") or ""
846
+ excerpt = result.get("excerpt") or result.get("body") or ""
847
+ if excerpt:
848
+ excerpt = excerpt[:150] + "..." if len(excerpt) > 150 else excerpt
849
+
850
+ candidate_lines.append(
851
+ f"{idx}. {section_title or article or 'Điều khoản'}\n"
852
+ f" {'Điều: ' + article if article else ''}\n"
853
+ f" Nội dung: {excerpt[:200] or 'Không có'}"
854
+ )
855
+
856
+ prompt = (
857
+ "Bạn là trợ lý pháp luật. Người dùng đã chọn văn bản:\n"
858
+ f"- Mã: {document_code}\n"
859
+ f"- Tên: {document_title}\n\n"
860
+ f"Câu hỏi ban đầu của người dùng: \"{query.strip()}\"\n\n"
861
+ )
862
+
863
+ if context_summary:
864
+ prompt += (
865
+ f"Lịch sử hội thoại gần đây:\n{context_summary}\n\n"
866
+ )
867
+
868
+ prompt += (
869
+ "Đây là các điều khoản/chủ đề trong văn bản có thể liên quan:\n"
870
+ f"{os.linesep.join(candidate_lines)}\n\n"
871
+ f"Hãy chọn tối đa {max_options} chủ đề/điều khoản quan trọng nhất cần người dùng xác nhận.\n"
872
+ "Yêu cầu trả về JSON với dạng:\n"
873
+ "{\n"
874
+ ' "message": "Câu nhắc người dùng bằng tiếng Việt",\n'
875
+ ' "options": [\n'
876
+ ' {"title": "Tên chủ đề/điều khoản", "article": "Điều X", "reason": "Lý do gợi ý", "keywords": ["từ", "khóa", "tìm", "kiếm"]},\n'
877
+ " ...\n"
878
+ " ],\n"
879
+ ' "search_keywords": ["từ", "khóa", "chính", "để", "tìm", "kiếm"]\n'
880
+ "}\n"
881
+ "Trong đó:\n"
882
+ "- options: Danh sách chủ đề/điều khoản để người dùng chọn\n"
883
+ "- search_keywords: Danh sách từ khóa quan trọng để tìm kiếm thông tin liên quan\n"
884
+ "- Mỗi option nên có keywords riêng để tìm kiếm chính xác hơn\n"
885
+ "Chỉ in JSON, không thêm lời giải thích khác."
886
+ )
887
+
888
+ raw = self._generate_from_prompt(prompt, llm_mode="keywords")
889
+ if not raw:
890
+ return None
891
+
892
+ parsed = self._extract_json_payload(raw)
893
+ if not parsed:
894
+ return None
895
+
896
+ options = parsed.get("options") or []
897
+ sanitized_options = []
898
+ for option in options:
899
+ title = (option.get("title") or "").strip()
900
+ if not title:
901
+ continue
902
+
903
+ sanitized_options.append({
904
+ "title": title,
905
+ "article": (option.get("article") or "").strip(),
906
+ "reason": (option.get("reason") or "").strip(),
907
+ "keywords": option.get("keywords") or [],
908
+ })
909
+ if len(sanitized_options) >= max_options:
910
+ break
911
+
912
+ if not sanitized_options:
913
+ return None
914
+
915
+ message = (parsed.get("message") or f"Bạn muốn tìm điều khoản/chủ đề nào cụ thể trong {document_title}?").strip()
916
+ search_keywords = parsed.get("search_keywords") or []
917
+
918
+ return {
919
+ "message": message,
920
+ "options": sanitized_options,
921
+ "search_keywords": search_keywords,
922
+ }
923
+
924
+ def suggest_detail_options(
925
+ self,
926
+ query: str,
927
+ selected_document_code: str,
928
+ selected_topic: str,
929
+ conversation_context: Optional[List[Dict[str, str]]] = None,
930
+ max_options: int = 3,
931
+ ) -> Optional[Dict[str, Any]]:
932
+ """
933
+ Ask the LLM to propose detail options for further clarification.
934
+
935
+ Args:
936
+ query: Original user query
937
+ selected_document_code: Selected document code
938
+ selected_topic: Selected topic/section
939
+ conversation_context: Recent conversation history
940
+ max_options: Maximum number of options to return
941
+
942
+ Returns:
943
+ Dict with message, options, and search_keywords
944
+ """
945
+ if not self.is_available():
946
+ return None
947
+
948
+ # Build context summary
949
+ context_summary = ""
950
+ if conversation_context:
951
+ recent_messages = conversation_context[-5:] # Last 5 messages
952
+ context_summary = "\n".join([
953
+ f"{msg.get('role', 'user')}: {msg.get('content', '')[:100]}"
954
+ for msg in recent_messages
955
+ ])
956
+
957
+ prompt = (
958
+ "Bạn là trợ lý pháp luật. Người dùng đã:\n"
959
+ f"1. Chọn văn bản: {selected_document_code}\n"
960
+ f"2. Chọn chủ đề: {selected_topic}\n\n"
961
+ f"Câu hỏi ban đầu: \"{query.strip()}\"\n\n"
962
+ )
963
+
964
+ if context_summary:
965
+ prompt += (
966
+ f"Lịch sử hội thoại:\n{context_summary}\n\n"
967
+ )
968
+
969
+ prompt += (
970
+ "Người dùng muốn biết thêm chi tiết về chủ đề này.\n"
971
+ f"Hãy đề xuất tối đa {max_options} khía cạnh/chi tiết cụ thể mà người dùng có thể muốn biết.\n"
972
+ "Yêu cầu trả về JSON với dạng:\n"
973
+ "{\n"
974
+ ' "message": "Câu hỏi xác nhận bằng tiếng Việt",\n'
975
+ ' "options": [\n'
976
+ ' {"title": "Khía cạnh/chi tiết", "reason": "Lý do gợi ý", "keywords": ["từ", "khóa"]},\n'
977
+ " ...\n"
978
+ " ],\n"
979
+ ' "search_keywords": ["từ", "khóa", "tìm", "kiếm"]\n'
980
+ "}\n"
981
+ "Chỉ in JSON, không thêm lời giải thích khác."
982
+ )
983
+
984
+ raw = self._generate_from_prompt(prompt, llm_mode="keywords")
985
+ if not raw:
986
+ return None
987
+
988
+ parsed = self._extract_json_payload(raw)
989
+ if not parsed:
990
+ return None
991
+
992
+ options = parsed.get("options") or []
993
+ sanitized_options = []
994
+ for option in options:
995
+ title = (option.get("title") or "").strip()
996
+ if not title:
997
+ continue
998
+
999
+ sanitized_options.append({
1000
+ "title": title,
1001
+ "reason": (option.get("reason") or "").strip(),
1002
+ "keywords": option.get("keywords") or [],
1003
+ })
1004
+ if len(sanitized_options) >= max_options:
1005
+ break
1006
+
1007
+ if not sanitized_options:
1008
+ return None
1009
+
1010
+ message = (parsed.get("message") or "Bạn muốn chi tiết gì cho chủ đề này nữa không?").strip()
1011
+ search_keywords = parsed.get("search_keywords") or []
1012
+
1013
+ return {
1014
+ "message": message,
1015
+ "options": sanitized_options,
1016
+ "search_keywords": search_keywords,
1017
+ }
1018
+
1019
+ def extract_search_keywords(
1020
+ self,
1021
+ query: str,
1022
+ selected_options: Optional[List[Dict[str, Any]]] = None,
1023
+ conversation_context: Optional[List[Dict[str, str]]] = None,
1024
+ ) -> List[str]:
1025
+ """
1026
+ Intelligently extract search keywords from query, selected options, and context.
1027
+
1028
+ Args:
1029
+ query: Original user query
1030
+ selected_options: List of selected options (document, topic, etc.)
1031
+ conversation_context: Recent conversation history
1032
+
1033
+ Returns:
1034
+ List of extracted keywords for search optimization
1035
+ """
1036
+ if not self.is_available():
1037
+ # Fallback to simple keyword extraction
1038
+ return self._fallback_keyword_extraction(query)
1039
+
1040
+ # Build context
1041
+ context_text = query
1042
+ if selected_options:
1043
+ for opt in selected_options:
1044
+ title = opt.get("title") or opt.get("code") or ""
1045
+ reason = opt.get("reason") or ""
1046
+ keywords = opt.get("keywords") or []
1047
+ if title:
1048
+ context_text += f" {title}"
1049
+ if reason:
1050
+ context_text += f" {reason}"
1051
+ if keywords:
1052
+ context_text += f" {' '.join(keywords)}"
1053
+
1054
+ if conversation_context:
1055
+ recent_user_messages = [
1056
+ msg.get("content", "")
1057
+ for msg in conversation_context[-3:]
1058
+ if msg.get("role") == "user"
1059
+ ]
1060
+ context_text += " " + " ".join(recent_user_messages)
1061
+
1062
+ prompt = (
1063
+ "Bạn là trợ lý pháp luật. Tôi cần bạn trích xuất các từ khóa quan trọng để tìm kiếm thông tin.\n\n"
1064
+ f"Ngữ cảnh: {context_text[:500]}\n\n"
1065
+ "Hãy trích xuất 5-10 từ khóa quan trọng nhất (tiếng Việt) để tìm kiếm.\n"
1066
+ "Yêu cầu trả về JSON với dạng:\n"
1067
+ "{\n"
1068
+ ' "keywords": ["từ", "khóa", "quan", "trọng"]\n'
1069
+ "}\n"
1070
+ "Chỉ in JSON, không thêm lời giải thích khác."
1071
+ )
1072
+
1073
+ raw = self._generate_from_prompt(prompt, llm_mode="keywords")
1074
+ if not raw:
1075
+ return self._fallback_keyword_extraction(query)
1076
+
1077
+ parsed = self._extract_json_payload(raw)
1078
+ if not parsed:
1079
+ return self._fallback_keyword_extraction(query)
1080
+
1081
+ keywords = parsed.get("keywords") or []
1082
+ if isinstance(keywords, list) and len(keywords) > 0:
1083
+ # Filter out stopwords and short words
1084
+ filtered_keywords = [
1085
+ kw.strip().lower()
1086
+ for kw in keywords
1087
+ if kw and len(kw.strip()) > 2
1088
+ ]
1089
+ return filtered_keywords[:10] # Limit to 10 keywords
1090
+
1091
+ return self._fallback_keyword_extraction(query)
1092
+
1093
+ def _fallback_keyword_extraction(self, query: str) -> List[str]:
1094
+ """Fallback keyword extraction using simple rule-based method."""
1095
+ # Simple Vietnamese stopwords
1096
+ stopwords = {
1097
+ "và", "của", "cho", "với", "trong", "là", "có", "được", "bị", "sẽ",
1098
+ "thì", "mà", "này", "đó", "nào", "gì", "như", "về", "từ", "đến",
1099
+ "các", "những", "một", "hai", "ba", "bốn", "năm", "sáu", "bảy", "tám",
1100
+ "chín", "mười", "nhiều", "ít", "rất", "quá", "cũng", "đã", "sẽ",
1101
+ }
1102
+
1103
+ words = query.lower().split()
1104
+ keywords = [
1105
+ w.strip()
1106
+ for w in words
1107
+ if w.strip() not in stopwords and len(w.strip()) > 2
1108
+ ]
1109
+ return keywords[:10]
1110
+
1111
+ def _extract_json_payload(self, raw: str) -> Optional[Dict[str, Any]]:
1112
+ """Best-effort extraction of JSON object from raw LLM text."""
1113
+ if not raw:
1114
+ return None
1115
+ raw = raw.strip()
1116
+ for snippet in (raw, self._slice_to_json(raw)):
1117
+ if not snippet:
1118
+ continue
1119
+ try:
1120
+ return json.loads(snippet)
1121
+ except Exception:
1122
+ continue
1123
+ return None
1124
+
1125
+ def _slice_to_json(self, text: str) -> Optional[str]:
1126
+ start = text.find("{")
1127
+ end = text.rfind("}")
1128
+ if start == -1 or end == -1 or end <= start:
1129
+ return None
1130
+ return text[start : end + 1]
1131
+
1132
+ def generate_structured_legal_answer(
1133
+ self,
1134
+ query: str,
1135
+ documents: List[Any],
1136
+ prefill_summary: Optional[str] = None,
1137
+ ) -> Optional[LegalAnswer]:
1138
+ """
1139
+ Ask the LLM for a structured legal answer (summary + details + citations).
1140
+ """
1141
+ if not self.is_available() or not documents:
1142
+ return None
1143
+
1144
+ parser = get_legal_output_parser()
1145
+ guard = get_legal_guard()
1146
+ retry_hint: Optional[str] = None
1147
+ failure_reason: Optional[str] = None
1148
+
1149
+ for attempt in range(LEGAL_STRUCTURED_MAX_ATTEMPTS):
1150
+ prompt = build_structured_legal_prompt(
1151
+ query,
1152
+ documents,
1153
+ parser,
1154
+ prefill_summary=prefill_summary,
1155
+ retry_hint=retry_hint,
1156
+ )
1157
+ logger.debug(
1158
+ "[LLM] Structured prompt preview (attempt %s): %s",
1159
+ attempt + 1,
1160
+ prompt[:600].replace("\n", " "),
1161
+ )
1162
+ raw_output = self._generate_from_prompt(prompt)
1163
+
1164
+ if not raw_output:
1165
+ failure_reason = "LLM không trả lời"
1166
+ retry_hint = (
1167
+ "Lần trước bạn không trả về JSON nào. "
1168
+ "Hãy in duy nhất một JSON với SUMMARY, DETAILS và CITATIONS."
1169
+ )
1170
+ continue
1171
+
1172
+ _write_guardrails_debug(
1173
+ f"raw_output_attempt_{attempt + 1}",
1174
+ raw_output,
1175
+ )
1176
+ structured: Optional[LegalAnswer] = None
1177
+
1178
+ try:
1179
+ guard_result = guard.parse(llm_output=raw_output)
1180
+ guarded_output = getattr(guard_result, "validated_output", None)
1181
+ if guarded_output:
1182
+ structured = LegalAnswer.parse_obj(guarded_output)
1183
+ _write_guardrails_debug(
1184
+ f"guard_validated_attempt_{attempt + 1}",
1185
+ json.dumps(guarded_output, ensure_ascii=False),
1186
+ )
1187
+ except Exception as exc:
1188
+ failure_reason = f"Guardrails: {exc}"
1189
+ logger.warning("[LLM] Guardrails validation failed: %s", exc)
1190
+ _write_guardrails_debug(
1191
+ f"guard_error_attempt_{attempt + 1}",
1192
+ f"{type(exc).__name__}: {exc}",
1193
+ )
1194
+
1195
+ if not structured:
1196
+ structured = parse_structured_output(parser, raw_output or "")
1197
+ if structured:
1198
+ _write_guardrails_debug(
1199
+ f"parser_recovery_attempt_{attempt + 1}",
1200
+ structured.model_dump_json(indent=None, ensure_ascii=False),
1201
+ )
1202
+ else:
1203
+ retry_hint = (
1204
+ "JSON chưa hợp lệ. Hãy dùng cấu trúc SUMMARY/DETAILS/CITATIONS như ví dụ."
1205
+ )
1206
+ continue
1207
+
1208
+ is_valid, validation_reason = _validate_structured_answer(structured, documents)
1209
+ if is_valid:
1210
+ return structured
1211
+
1212
+ failure_reason = validation_reason or "Không đạt yêu cầu kiểm tra nội dung"
1213
+ logger.warning(
1214
+ "[LLM] ❌ Structured answer failed validation: %s", failure_reason
1215
+ )
1216
+ retry_hint = (
1217
+ f"Lần trước vi phạm: {failure_reason}. "
1218
+ "Hãy dùng đúng tên văn bản và mã điều trong bảng tham chiếu, không bịa thông tin mới."
1219
+ )
1220
+
1221
+ logger.warning(
1222
+ "[LLM] ❌ Structured legal parsing failed sau %s lần. Lý do cuối: %s",
1223
+ LEGAL_STRUCTURED_MAX_ATTEMPTS,
1224
+ failure_reason,
1225
+ )
1226
+ return None
1227
+
1228
+ def _format_document(self, doc: Any) -> str:
1229
+ """Format document for prompt."""
1230
+ doc_type = type(doc).__name__.lower()
1231
+
1232
+ if "fine" in doc_type:
1233
+ parts = [f"Mức phạt: {getattr(doc, 'name', '')}"]
1234
+ if hasattr(doc, 'code') and doc.code:
1235
+ parts.append(f"Mã: {doc.code}")
1236
+ if hasattr(doc, 'min_fine') and hasattr(doc, 'max_fine'):
1237
+ if doc.min_fine and doc.max_fine:
1238
+ parts.append(f"Số tiền: {doc.min_fine:,.0f} - {doc.max_fine:,.0f} VNĐ")
1239
+ return " | ".join(parts)
1240
+
1241
+ elif "procedure" in doc_type:
1242
+ parts = [f"Thủ tục: {getattr(doc, 'title', '')}"]
1243
+ if hasattr(doc, 'dossier') and doc.dossier:
1244
+ parts.append(f"Hồ sơ: {doc.dossier}")
1245
+ if hasattr(doc, 'fee') and doc.fee:
1246
+ parts.append(f"Lệ phí: {doc.fee}")
1247
+ return " | ".join(parts)
1248
+
1249
+ elif "office" in doc_type:
1250
+ parts = [f"Đơn vị: {getattr(doc, 'unit_name', '')}"]
1251
+ if hasattr(doc, 'address') and doc.address:
1252
+ parts.append(f"Địa chỉ: {doc.address}")
1253
+ if hasattr(doc, 'phone') and doc.phone:
1254
+ parts.append(f"Điện thoại: {doc.phone}")
1255
+ return " | ".join(parts)
1256
+
1257
+ elif "advisory" in doc_type:
1258
+ parts = [f"Cảnh báo: {getattr(doc, 'title', '')}"]
1259
+ if hasattr(doc, 'summary') and doc.summary:
1260
+ parts.append(f"Nội dung: {doc.summary[:200]}")
1261
+ return " | ".join(parts)
1262
+
1263
+ elif "legalsection" in doc_type or "legal" in doc_type:
1264
+ parts = []
1265
+ if hasattr(doc, 'section_code') and doc.section_code:
1266
+ parts.append(f"Điều khoản: {doc.section_code}")
1267
+ if hasattr(doc, 'section_title') and doc.section_title:
1268
+ parts.append(f"Tiêu đề: {doc.section_title}")
1269
+ if hasattr(doc, 'document') and doc.document:
1270
+ doc_obj = doc.document
1271
+ if hasattr(doc_obj, 'title'):
1272
+ parts.append(f"Văn bản: {doc_obj.title}")
1273
+ if hasattr(doc_obj, 'code'):
1274
+ parts.append(f"Mã văn bản: {doc_obj.code}")
1275
+ if hasattr(doc, 'content') and doc.content:
1276
+ # Provide longer snippet so LLM has enough context (up to ~1500 chars)
1277
+ max_len = 1500
1278
+ snippet = doc.content[:max_len].strip()
1279
+ if len(doc.content) > max_len:
1280
+ snippet += "..."
1281
+ parts.append(f"Nội dung: {snippet}")
1282
+ return " | ".join(parts) if parts else str(doc)
1283
+
1284
+ return str(doc)
1285
+
1286
+ def _generate_openai(self, prompt: str) -> Optional[str]:
1287
+ """Generate answer using OpenAI."""
1288
+ if not self.client:
1289
+ return None
1290
+
1291
+ try:
1292
+ response = self.client.chat.completions.create(
1293
+ model=os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo"),
1294
+ messages=[
1295
+ {"role": "system", "content": "Bạn là chuyên gia tư vấn về xử lí kỷ luật cán bộ đảng viên của Phòng Thanh Tra - Công An Thành Phố Huế. Bạn giúp người dùng tra cứu các văn bản quy định pháp luật về xử lí kỷ luật cán bộ đảng viên."},
1296
+ {"role": "user", "content": prompt}
1297
+ ],
1298
+ temperature=0.7,
1299
+ max_tokens=500
1300
+ )
1301
+ return response.choices[0].message.content
1302
+ except Exception as e:
1303
+ print(f"OpenAI API error: {e}")
1304
+ return None
1305
+
1306
+ def _generate_anthropic(self, prompt: str) -> Optional[str]:
1307
+ """Generate answer using Anthropic Claude."""
1308
+ if not self.client:
1309
+ return None
1310
+
1311
+ try:
1312
+ message = self.client.messages.create(
1313
+ model=os.environ.get("ANTHROPIC_MODEL", "claude-3-5-sonnet-20241022"),
1314
+ max_tokens=500,
1315
+ messages=[
1316
+ {"role": "user", "content": prompt}
1317
+ ]
1318
+ )
1319
+ return message.content[0].text
1320
+ except Exception as e:
1321
+ print(f"Anthropic API error: {e}")
1322
+ return None
1323
+
1324
+ def _generate_ollama(self, prompt: str) -> Optional[str]:
1325
+ """Generate answer using Ollama (local LLM)."""
1326
+ try:
1327
+ import requests
1328
+ model = getattr(self, 'ollama_model', os.environ.get("OLLAMA_MODEL", "qwen2.5:7b"))
1329
+
1330
+ response = requests.post(
1331
+ f"{self.ollama_base_url}/api/generate",
1332
+ json={
1333
+ "model": model,
1334
+ "prompt": prompt,
1335
+ "stream": False,
1336
+ "options": {
1337
+ "temperature": 0.7,
1338
+ "top_p": 0.9,
1339
+ "num_predict": 500
1340
+ }
1341
+ },
1342
+ timeout=60
1343
+ )
1344
+
1345
+ if response.status_code == 200:
1346
+ return response.json().get("response")
1347
+ return None
1348
+ except Exception as e:
1349
+ print(f"Ollama API error: {e}")
1350
+ return None
1351
+
1352
+ def _generate_huggingface(self, prompt: str, mode: str = "answer") -> Optional[str]:
1353
+ """Generate answer using Hugging Face Inference API."""
1354
+ try:
1355
+ import requests
1356
+
1357
+ api_url = f"https://api-inference.huggingface.co/models/{self.hf_model}"
1358
+ headers = {}
1359
+ if hasattr(self, 'hf_api_key') and self.hf_api_key:
1360
+ headers["Authorization"] = f"Bearer {self.hf_api_key}"
1361
+
1362
+ response = requests.post(
1363
+ api_url,
1364
+ headers=headers,
1365
+ json={
1366
+ "inputs": prompt,
1367
+ "parameters": {
1368
+ "temperature": 0.2 if mode == "keywords" else 0.7,
1369
+ "max_new_tokens": 80 if mode == "keywords" else 256,
1370
+ "return_full_text": False
1371
+ }
1372
+ },
1373
+ timeout=60
1374
+ )
1375
+
1376
+ if response.status_code == 200:
1377
+ result = response.json()
1378
+ if isinstance(result, list) and len(result) > 0:
1379
+ return result[0].get("generated_text", "")
1380
+ elif isinstance(result, dict):
1381
+ return result.get("generated_text", "")
1382
+ elif response.status_code == 503:
1383
+ # Model is loading, wait and retry
1384
+ print("⚠️ Model is loading, please wait...")
1385
+ return None
1386
+ else:
1387
+ print(f"Hugging Face API error: {response.status_code} - {response.text}")
1388
+ return None
1389
+ except Exception as e:
1390
+ print(f"Hugging Face API error: {e}")
1391
+ return None
1392
+
1393
+ def _generate_local(self, prompt: str, mode: str = "answer") -> Optional[str]:
1394
+ """Generate answer using local Hugging Face Transformers model."""
1395
+ if self.local_model is None or self.local_tokenizer is None:
1396
+ return None
1397
+
1398
+ try:
1399
+ import torch
1400
+
1401
+ # Format prompt for Qwen models
1402
+ if mode == "keywords":
1403
+ system_content = (
1404
+ "Bạn là trợ lý trích xuất từ khóa. Nhận câu hỏi pháp lý và "
1405
+ "chỉ trả về 5-8 từ khóa tiếng Việt, phân tách bằng dấu phẩy. "
1406
+ "Không viết câu đầy đủ, không thêm lời giải thích."
1407
+ )
1408
+ else:
1409
+ system_content = (
1410
+ "Bạn là chuyên gia tư vấn pháp luật. Trả lời tự nhiên, ngắn gọn, "
1411
+ "dựa trên thông tin đã cho."
1412
+ )
1413
+
1414
+ messages = [
1415
+ {"role": "system", "content": system_content},
1416
+ {"role": "user", "content": prompt},
1417
+ ]
1418
+
1419
+ # Apply chat template if available
1420
+ if hasattr(self.local_tokenizer, "apply_chat_template"):
1421
+ text = self.local_tokenizer.apply_chat_template(
1422
+ messages,
1423
+ tokenize=False,
1424
+ add_generation_prompt=True
1425
+ )
1426
+ else:
1427
+ text = prompt
1428
+
1429
+ # Tokenize
1430
+ inputs = self.local_tokenizer(text, return_tensors="pt")
1431
+
1432
+ # Move to device
1433
+ device = next(self.local_model.parameters()).device
1434
+ inputs = {k: v.to(device) for k, v in inputs.items()}
1435
+
1436
+ # Generate with optimized parameters for faster inference
1437
+ with torch.no_grad():
1438
+ # Use greedy decoding for faster generation (can switch to sampling if needed)
1439
+ outputs = self.local_model.generate(
1440
+ **inputs,
1441
+ max_new_tokens=80 if mode == "keywords" else 256,
1442
+ temperature=0.2 if mode == "keywords" else 0.6,
1443
+ top_p=0.7 if mode == "keywords" else 0.85,
1444
+ do_sample=True,
1445
+ use_cache=True, # Enable KV cache for faster generation
1446
+ pad_token_id=self.local_tokenizer.eos_token_id,
1447
+ repetition_penalty=1.05 if mode == "keywords" else 1.1,
1448
+ )
1449
+
1450
+ # Decode
1451
+ generated_text = self.local_tokenizer.decode(
1452
+ outputs[0][inputs["input_ids"].shape[1]:],
1453
+ skip_special_tokens=True
1454
+ )
1455
+
1456
+ return generated_text.strip()
1457
+
1458
+ except TypeError as e:
1459
+ # Check for Int8Params compatibility error
1460
+ if "_is_hf_initialized" in str(e) or "Int8Params" in str(e):
1461
+ error_msg = (
1462
+ f"[LLM] ❌ Int8Params compatibility error: {e}\n"
1463
+ f"[LLM] 💡 This error occurs when using 8-bit quantization with incompatible library versions.\n"
1464
+ f"[LLM] 💡 Solutions:\n"
1465
+ f"[LLM] 1. Set LOCAL_MODEL_QUANTIZATION=4bit to use 4-bit quantization instead\n"
1466
+ f"[LLM] 2. Set LOCAL_MODEL_QUANTIZATION=none to disable quantization\n"
1467
+ f"[LLM] 3. Use API mode (LLM_PROVIDER=api) to avoid local model issues\n"
1468
+ f"[LLM] 4. Use a smaller model like Qwen/Qwen2.5-1.5B-Instruct"
1469
+ )
1470
+ print(error_msg, flush=True)
1471
+ logger.error(f"[LLM] ❌ Int8Params compatibility error: {e}")
1472
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
1473
+ return None
1474
+ else:
1475
+ # Other TypeError, re-raise to be caught by general handler
1476
+ raise
1477
+ except Exception as e:
1478
+ error_trace = traceback.format_exc()
1479
+ print(f"[LLM] ❌ Local model generation error: {e}", flush=True)
1480
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
1481
+ logger.error(f"[LLM] ❌ Local model generation error: {e}\n{error_trace}")
1482
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
1483
+ traceback.print_exc(file=sys.stderr)
1484
+ return None
1485
+
1486
+ def _generate_llama_cpp(self, prompt: str, mode: str = "answer") -> Optional[str]:
1487
+ """Generate answer using llama.cpp GGUF runtime."""
1488
+ if self.llama_cpp is None:
1489
+ return None
1490
+
1491
+ try:
1492
+ if mode == "keywords":
1493
+ temperature = float(os.environ.get("LLAMA_CPP_TEMPERATURE_KW", "0.2"))
1494
+ top_p = float(os.environ.get("LLAMA_CPP_TOP_P_KW", "0.7"))
1495
+ max_tokens = int(os.environ.get("LLAMA_CPP_MAX_TOKENS_KW", "80"))
1496
+ repeat_penalty = float(os.environ.get("LLAMA_CPP_REPEAT_PENALTY_KW", "1.05"))
1497
+ system_prompt = os.environ.get(
1498
+ "LLAMA_CPP_SYSTEM_PROMPT_KW",
1499
+ (
1500
+ "Bạn là trợ lý trích xuất từ khóa. Nhiệm vụ: nhận câu hỏi pháp lý "
1501
+ "và chỉ trả về 5-8 từ khóa tiếng Việt, phân tách bằng dấu phẩy. "
1502
+ "Không giải thích, không viết câu đầy đủ, không thêm tiền tố/hậu tố."
1503
+ ),
1504
+ )
1505
+ else:
1506
+ temperature = float(os.environ.get("LLAMA_CPP_TEMPERATURE", "0.35"))
1507
+ top_p = float(os.environ.get("LLAMA_CPP_TOP_P", "0.85"))
1508
+ max_tokens = int(os.environ.get("LLAMA_CPP_MAX_TOKENS", "256"))
1509
+ repeat_penalty = float(os.environ.get("LLAMA_CPP_REPEAT_PENALTY", "1.1"))
1510
+ system_prompt = os.environ.get(
1511
+ "LLAMA_CPP_SYSTEM_PROMPT",
1512
+ (
1513
+ "Bạn là chuyên gia tư vấn về xử lí kỷ luật cán bộ đảng viên của "
1514
+ "Phòng Thanh Tra - Công An Thành Phố Huế. Trả lời ngắn gọn, chính "
1515
+ "xác, trích dẫn văn bản và mã điều nếu có."
1516
+ ),
1517
+ )
1518
+
1519
+ response = self.llama_cpp.create_chat_completion(
1520
+ messages=[
1521
+ {"role": "system", "content": system_prompt},
1522
+ {"role": "user", "content": prompt},
1523
+ ],
1524
+ temperature=temperature,
1525
+ top_p=top_p,
1526
+ max_tokens=max_tokens,
1527
+ repeat_penalty=repeat_penalty,
1528
+ stream=False,
1529
+ )
1530
+
1531
+ choices = response.get("choices")
1532
+ if not choices:
1533
+ return None
1534
+ content = choices[0]["message"]["content"]
1535
+ if isinstance(content, list):
1536
+ # llama.cpp may return list of segments
1537
+ content = "".join(segment.get("text", "") for segment in content)
1538
+ if isinstance(content, str):
1539
+ return content.strip()
1540
+ return None
1541
+ except Exception as exc:
1542
+ error_trace = traceback.format_exc()
1543
+ print(f"[LLM] ❌ llama.cpp generation error: {exc}", flush=True)
1544
+ print(f"[LLM] ❌ Trace: {error_trace}", flush=True)
1545
+ logger.error("llama.cpp generation error: %s\n%s", exc, error_trace)
1546
+ return None
1547
+
1548
+ def _generate_api(self, prompt: str, context: Optional[List[Dict[str, Any]]] = None) -> Optional[str]:
1549
+ """Generate answer by calling HF Spaces API.
1550
+
1551
+ Args:
1552
+ prompt: Full prompt including query and documents context.
1553
+ context: Optional conversation context (not used in API mode, handled by HF Spaces).
1554
+ """
1555
+ if not self.api_base_url:
1556
+ return None
1557
+
1558
+ try:
1559
+ import requests
1560
+
1561
+ # Prepare request payload
1562
+ # Send the full prompt (with documents) as the message to HF Spaces
1563
+ # This ensures HF Spaces receives all context from retrieved documents
1564
+ payload = {
1565
+ "message": prompt,
1566
+ "reset_session": False
1567
+ }
1568
+
1569
+ # Only add session_id if we have a valid session context
1570
+ # For now, we'll omit it and let the API generate a new one
1571
+
1572
+ # Add context if available (API may support this in future)
1573
+ # For now, context is handled by the API internally
1574
+
1575
+ # Call API endpoint
1576
+ api_url = f"{self.api_base_url}/chatbot/chat/"
1577
+ print(f"[LLM] 🔗 Calling API: {api_url}", flush=True)
1578
+ print(f"[LLM] 📤 Payload: {payload}", flush=True)
1579
+
1580
+ response = requests.post(
1581
+ api_url,
1582
+ json=payload,
1583
+ headers={"Content-Type": "application/json"},
1584
+ timeout=60
1585
+ )
1586
+
1587
+ print(f"[LLM] 📥 Response status: {response.status_code}", flush=True)
1588
+ print(f"[LLM] 📥 Response headers: {dict(response.headers)}", flush=True)
1589
+
1590
+ if response.status_code == 200:
1591
+ try:
1592
+ result = response.json()
1593
+ print(f"[LLM] 📥 Response JSON: {result}", flush=True)
1594
+ # Extract message from response
1595
+ if isinstance(result, dict):
1596
+ message = result.get("message", None)
1597
+ if message:
1598
+ print(f"[LLM] ✅ Got message from API (length: {len(message)})", flush=True)
1599
+ return message
1600
+ else:
1601
+ print(f"[LLM] ⚠️ Response is not a dict: {type(result)}", flush=True)
1602
+ return None
1603
+ except ValueError as e:
1604
+ print(f"[LLM] ❌ JSON decode error: {e}", flush=True)
1605
+ print(f"[LLM] ❌ Response text: {response.text[:500]}", flush=True)
1606
+ return None
1607
+ elif response.status_code == 503:
1608
+ # Service unavailable - model might be loading
1609
+ print("[LLM] ⚠️ API service is loading, please wait...", flush=True)
1610
+ return None
1611
+ else:
1612
+ print(f"[LLM] ❌ API error: {response.status_code} - {response.text[:500]}", flush=True)
1613
+ return None
1614
+ except requests.exceptions.Timeout:
1615
+ print("[LLM] ❌ API request timeout")
1616
+ return None
1617
+ except requests.exceptions.ConnectionError as e:
1618
+ print(f"[LLM] ❌ API connection error: {e}")
1619
+ return None
1620
+ except Exception as e:
1621
+ error_trace = traceback.format_exc()
1622
+ print(f"[LLM] ❌ API mode error: {e}", flush=True)
1623
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
1624
+ logger.error(f"[LLM] ❌ API mode error: {e}\n{error_trace}")
1625
+ return None
1626
+
1627
+ def summarize_context(self, messages: List[Dict[str, Any]], max_length: int = 200) -> str:
1628
+ """
1629
+ Summarize conversation context.
1630
+
1631
+ Args:
1632
+ messages: List of conversation messages.
1633
+ max_length: Maximum summary length.
1634
+
1635
+ Returns:
1636
+ Summary string.
1637
+ """
1638
+ if not messages:
1639
+ return ""
1640
+
1641
+ # Simple summarization: extract key entities and intents
1642
+ intents = []
1643
+ entities = set()
1644
+
1645
+ for msg in messages:
1646
+ if msg.get("intent"):
1647
+ intents.append(msg["intent"])
1648
+ if msg.get("entities"):
1649
+ for key, value in msg["entities"].items():
1650
+ if isinstance(value, str):
1651
+ entities.add(value)
1652
+ elif isinstance(value, list):
1653
+ entities.update(value)
1654
+
1655
+ summary_parts = []
1656
+ if intents:
1657
+ unique_intents = list(set(intents))
1658
+ summary_parts.append(f"Chủ đề: {', '.join(unique_intents)}")
1659
+ if entities:
1660
+ summary_parts.append(f"Thông tin: {', '.join(list(entities)[:5])}")
1661
+
1662
+ summary = ". ".join(summary_parts)
1663
+ return summary[:max_length] if len(summary) > max_length else summary
1664
+
1665
+ def extract_entities_llm(self, query: str) -> Dict[str, Any]:
1666
+ """
1667
+ Extract entities using LLM.
1668
+
1669
+ Args:
1670
+ query: User query.
1671
+
1672
+ Returns:
1673
+ Dictionary of extracted entities.
1674
+ """
1675
+ if not self.is_available():
1676
+ return {}
1677
+
1678
+ prompt = f"""
1679
+ Trích xuất các thực thể từ câu hỏi sau:
1680
+ "{query}"
1681
+
1682
+ Các loại thực thể cần tìm:
1683
+ - fine_code: Mã vi phạm (V001, V002, ...)
1684
+ - fine_name: Tên vi phạm
1685
+ - procedure_name: Tên thủ tục
1686
+ - office_name: Tên đơn vị
1687
+
1688
+ Trả lời dưới dạng JSON: {{"fine_code": "...", "fine_name": "...", ...}}
1689
+ Nếu không có, trả về {{}}.
1690
+ """
1691
+
1692
+ try:
1693
+ if self.provider == LLM_PROVIDER_OPENAI:
1694
+ response = self._generate_openai(prompt)
1695
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
1696
+ response = self._generate_anthropic(prompt)
1697
+ elif self.provider == LLM_PROVIDER_OLLAMA:
1698
+ response = self._generate_ollama(prompt)
1699
+ elif self.provider == LLM_PROVIDER_HUGGINGFACE:
1700
+ response = self._generate_huggingface(prompt)
1701
+ elif self.provider == LLM_PROVIDER_LOCAL:
1702
+ response = self._generate_local(prompt)
1703
+ elif self.provider == LLM_PROVIDER_API:
1704
+ # For API mode, we can't extract entities directly
1705
+ # Return empty dict
1706
+ return {}
1707
+ else:
1708
+ return {}
1709
+
1710
+ if response:
1711
+ # Try to extract JSON from response
1712
+ json_match = re.search(r'\{[^}]+\}', response)
1713
+ if json_match:
1714
+ return json.loads(json_match.group())
1715
+ except Exception as e:
1716
+ print(f"Error extracting entities with LLM: {e}")
1717
+
1718
+ return {}
1719
+
1720
+
1721
+ # Global LLM generator instance
1722
+ _llm_generator: Optional[LLMGenerator] = None
1723
+ _last_provider: Optional[str] = None
1724
+
1725
+ def get_llm_generator() -> Optional[LLMGenerator]:
1726
+ """Get or create LLM generator instance.
1727
+
1728
+ Recreates instance only if provider changed (e.g., from local to api).
1729
+ Model is kept alive and reused across requests.
1730
+ """
1731
+ global _llm_generator, _last_provider
1732
+
1733
+ # Get current provider from env
1734
+ current_provider = os.environ.get("LLM_PROVIDER", LLM_PROVIDER).lower()
1735
+
1736
+ # Recreate only if provider changed, instance doesn't exist, or model not available
1737
+ if _llm_generator is None or _last_provider != current_provider or not _llm_generator.is_available():
1738
+ _llm_generator = LLMGenerator()
1739
+ _last_provider = current_provider
1740
+ print(f"[LLM] 🔄 Recreated LLM generator with provider: {current_provider}", flush=True)
1741
+ else:
1742
+ # Model already exists and provider hasn't changed - reuse it
1743
+ print("[LLM] ♻️ Reusing existing LLM generator instance (model kept alive)", flush=True)
1744
+ logger.debug("[LLM] Reusing existing LLM generator instance (model kept alive)")
1745
+
1746
+ return _llm_generator if _llm_generator.is_available() else None
backend/hue_portal/chatbot/llm_integration.py.backup ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM integration for natural answer generation.
3
+ Supports OpenAI GPT, Anthropic Claude, and local LLMs (Ollama).
4
+ """
5
+ import os
6
+ import re
7
+ import json
8
+ from typing import List, Dict, Any, Optional
9
+ try:
10
+ from dotenv import load_dotenv
11
+ load_dotenv()
12
+ except ImportError:
13
+ pass # dotenv is optional
14
+
15
+ # LLM Provider types
16
+ LLM_PROVIDER_OPENAI = "openai"
17
+ LLM_PROVIDER_ANTHROPIC = "anthropic"
18
+ LLM_PROVIDER_OLLAMA = "ollama"
19
+ LLM_PROVIDER_NONE = "none"
20
+
21
+ # Get provider from environment
22
+ LLM_PROVIDER = os.environ.get("LLM_PROVIDER", LLM_PROVIDER_NONE).lower()
23
+
24
+
25
+ class LLMGenerator:
26
+ """Generate natural language answers using LLMs."""
27
+
28
+ def __init__(self, provider: Optional[str] = None):
29
+ """
30
+ Initialize LLM generator.
31
+
32
+ Args:
33
+ provider: LLM provider ('openai', 'anthropic', 'ollama', or None for auto-detect).
34
+ """
35
+ self.provider = provider or LLM_PROVIDER
36
+ self.client = None
37
+ self._initialize_client()
38
+
39
+ def _initialize_client(self):
40
+ """Initialize LLM client based on provider."""
41
+ if self.provider == LLM_PROVIDER_OPENAI:
42
+ try:
43
+ import openai
44
+ api_key = os.environ.get("OPENAI_API_KEY")
45
+ if api_key:
46
+ self.client = openai.OpenAI(api_key=api_key)
47
+ print("✅ OpenAI client initialized")
48
+ else:
49
+ print("⚠️ OPENAI_API_KEY not found, OpenAI disabled")
50
+ except ImportError:
51
+ print("⚠️ openai package not installed, install with: pip install openai")
52
+
53
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
54
+ try:
55
+ import anthropic
56
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
57
+ if api_key:
58
+ self.client = anthropic.Anthropic(api_key=api_key)
59
+ print("✅ Anthropic client initialized")
60
+ else:
61
+ print("⚠️ ANTHROPIC_API_KEY not found, Anthropic disabled")
62
+ except ImportError:
63
+ print("⚠️ anthropic package not installed, install with: pip install anthropic")
64
+
65
+ elif self.provider == LLM_PROVIDER_OLLAMA:
66
+ self.ollama_base_url = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
67
+ print(f"✅ Ollama configured (base_url: {self.ollama_base_url})")
68
+
69
+ else:
70
+ print("ℹ️ No LLM provider configured, using template-based generation")
71
+
72
+ def is_available(self) -> bool:
73
+ """Check if LLM is available."""
74
+ return self.client is not None or self.provider == LLM_PROVIDER_OLLAMA
75
+
76
+ def generate_answer(
77
+ self,
78
+ query: str,
79
+ context: Optional[List[Dict[str, Any]]] = None,
80
+ documents: Optional[List[Any]] = None
81
+ ) -> Optional[str]:
82
+ """
83
+ Generate natural language answer from documents.
84
+
85
+ Args:
86
+ query: User query.
87
+ context: Optional conversation context.
88
+ documents: Retrieved documents.
89
+
90
+ Returns:
91
+ Generated answer or None if LLM not available.
92
+ """
93
+ if not self.is_available():
94
+ return None
95
+
96
+ # Build prompt
97
+ prompt = self._build_prompt(query, context, documents)
98
+
99
+ try:
100
+ if self.provider == LLM_PROVIDER_OPENAI:
101
+ return self._generate_openai(prompt)
102
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
103
+ return self._generate_anthropic(prompt)
104
+ elif self.provider == LLM_PROVIDER_OLLAMA:
105
+ return self._generate_ollama(prompt)
106
+ except Exception as e:
107
+ print(f"Error generating answer with LLM: {e}")
108
+ return None
109
+
110
+ def _build_prompt(
111
+ self,
112
+ query: str,
113
+ context: Optional[List[Dict[str, Any]]],
114
+ documents: Optional[List[Any]]
115
+ ) -> str:
116
+ """Build prompt for LLM."""
117
+ prompt_parts = [
118
+ "Bạn là chatbot tư vấn pháp lý của Công an Thừa Thiên Huế.",
119
+ "Nhiệm vụ: Trả lời câu hỏi của người dùng dựa trên các văn bản pháp luật và quy định được cung cấp.",
120
+ "",
121
+ f"Câu hỏi của người dùng: {query}",
122
+ ""
123
+ ]
124
+
125
+ if context:
126
+ prompt_parts.append("Ngữ cảnh cuộc hội thoại trước đó:")
127
+ for msg in context[-3:]: # Last 3 messages
128
+ role = "Người dùng" if msg.get("role") == "user" else "Bot"
129
+ content = msg.get("content", "")
130
+ prompt_parts.append(f"{role}: {content}")
131
+ prompt_parts.append("")
132
+
133
+ if documents:
134
+ prompt_parts.append("Các văn bản/quy định liên quan:")
135
+ for i, doc in enumerate(documents[:5], 1):
136
+ # Extract relevant fields based on document type
137
+ doc_text = self._format_document(doc)
138
+ prompt_parts.append(f"{i}. {doc_text}")
139
+ prompt_parts.append("")
140
+
141
+ prompt_parts.extend([
142
+ "Yêu cầu QUAN TRỌNG:",
143
+ "- CHỈ trả lời dựa trên thông tin trong 'Các văn bản/quy định liên quan' ở trên",
144
+ "- KHÔNG được tự tạo hoặc suy đoán thông tin không có trong tài liệu",
145
+ "- Nếu thông tin không đủ để trả lời, hãy nói rõ: 'Thông tin trong cơ sở dữ liệu chưa đủ để trả lời câu hỏi này'",
146
+ "- Nếu có mức phạt, phải ghi rõ số tiền (ví dụ: 200.000 - 400.000 VNĐ)",
147
+ "- Nếu có điều khoản, ghi rõ mã điều (ví dụ: Điều 5, Điều 10)",
148
+ "- Nếu có thủ tục, ghi rõ hồ sơ, lệ phí, thời hạn",
149
+ "- Trả lời bằng tiếng Việt, ngắn gọn, dễ hiểu",
150
+ "",
151
+ "Trả lời:"
152
+ ])
153
+
154
+ return "\n".join(prompt_parts)
155
+
156
+ def _format_document(self, doc: Any) -> str:
157
+ """Format document for prompt."""
158
+ doc_type = type(doc).__name__.lower()
159
+
160
+ if "fine" in doc_type:
161
+ parts = [f"Mức phạt: {getattr(doc, 'name', '')}"]
162
+ if hasattr(doc, 'code') and doc.code:
163
+ parts.append(f"Mã: {doc.code}")
164
+ if hasattr(doc, 'min_fine') and hasattr(doc, 'max_fine'):
165
+ if doc.min_fine and doc.max_fine:
166
+ parts.append(f"Số tiền: {doc.min_fine:,.0f} - {doc.max_fine:,.0f} VNĐ")
167
+ return " | ".join(parts)
168
+
169
+ elif "procedure" in doc_type:
170
+ parts = [f"Thủ tục: {getattr(doc, 'title', '')}"]
171
+ if hasattr(doc, 'dossier') and doc.dossier:
172
+ parts.append(f"Hồ sơ: {doc.dossier}")
173
+ if hasattr(doc, 'fee') and doc.fee:
174
+ parts.append(f"Lệ phí: {doc.fee}")
175
+ return " | ".join(parts)
176
+
177
+ elif "office" in doc_type:
178
+ parts = [f"Đơn vị: {getattr(doc, 'unit_name', '')}"]
179
+ if hasattr(doc, 'address') and doc.address:
180
+ parts.append(f"Địa chỉ: {doc.address}")
181
+ if hasattr(doc, 'phone') and doc.phone:
182
+ parts.append(f"Điện thoại: {doc.phone}")
183
+ return " | ".join(parts)
184
+
185
+ elif "advisory" in doc_type:
186
+ parts = [f"Cảnh báo: {getattr(doc, 'title', '')}"]
187
+ if hasattr(doc, 'summary') and doc.summary:
188
+ parts.append(f"Nội dung: {doc.summary[:200]}")
189
+ return " | ".join(parts)
190
+
191
+ elif "legalsection" in doc_type or "legal" in doc_type:
192
+ parts = []
193
+ if hasattr(doc, 'section_code') and doc.section_code:
194
+ parts.append(f"Điều khoản: {doc.section_code}")
195
+ if hasattr(doc, 'section_title') and doc.section_title:
196
+ parts.append(f"Tiêu đề: {doc.section_title}")
197
+ if hasattr(doc, 'document') and doc.document:
198
+ doc_obj = doc.document
199
+ if hasattr(doc_obj, 'title'):
200
+ parts.append(f"Văn bản: {doc_obj.title}")
201
+ if hasattr(doc_obj, 'code'):
202
+ parts.append(f"Mã văn bản: {doc_obj.code}")
203
+ if hasattr(doc, 'content') and doc.content:
204
+ # Truncate content to 300 chars for prompt
205
+ content_short = doc.content[:300] + "..." if len(doc.content) > 300 else doc.content
206
+ parts.append(f"Nội dung: {content_short}")
207
+ return " | ".join(parts) if parts else str(doc)
208
+
209
+ return str(doc)
210
+
211
+ def _generate_openai(self, prompt: str) -> Optional[str]:
212
+ """Generate answer using OpenAI."""
213
+ if not self.client:
214
+ return None
215
+
216
+ try:
217
+ response = self.client.chat.completions.create(
218
+ model=os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo"),
219
+ messages=[
220
+ {"role": "system", "content": "Bạn là chatbot tư vấn chuyên nghiệp."},
221
+ {"role": "user", "content": prompt}
222
+ ],
223
+ temperature=0.7,
224
+ max_tokens=500
225
+ )
226
+ return response.choices[0].message.content
227
+ except Exception as e:
228
+ print(f"OpenAI API error: {e}")
229
+ return None
230
+
231
+ def _generate_anthropic(self, prompt: str) -> Optional[str]:
232
+ """Generate answer using Anthropic Claude."""
233
+ if not self.client:
234
+ return None
235
+
236
+ try:
237
+ message = self.client.messages.create(
238
+ model=os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307"),
239
+ max_tokens=500,
240
+ messages=[
241
+ {"role": "user", "content": prompt}
242
+ ]
243
+ )
244
+ return message.content[0].text
245
+ except Exception as e:
246
+ print(f"Anthropic API error: {e}")
247
+ return None
248
+
249
+ def _generate_ollama(self, prompt: str) -> Optional[str]:
250
+ """Generate answer using Ollama (local LLM)."""
251
+ try:
252
+ import requests
253
+ model = os.environ.get("OLLAMA_MODEL", "gemma3:1b")
254
+
255
+ response = requests.post(
256
+ f"{self.ollama_base_url}/api/generate",
257
+ json={
258
+ "model": model,
259
+ "prompt": prompt,
260
+ "stream": False,
261
+ "options": {
262
+ "temperature": 0.7,
263
+ "top_p": 0.9,
264
+ "num_predict": 500
265
+ }
266
+ },
267
+ timeout=60
268
+ )
269
+
270
+ if response.status_code == 200:
271
+ return response.json().get("response")
272
+ return None
273
+ except Exception as e:
274
+ print(f"Ollama API error: {e}")
275
+ return None
276
+
277
+ def summarize_context(self, messages: List[Dict[str, Any]], max_length: int = 200) -> str:
278
+ """
279
+ Summarize conversation context.
280
+
281
+ Args:
282
+ messages: List of conversation messages.
283
+ max_length: Maximum summary length.
284
+
285
+ Returns:
286
+ Summary string.
287
+ """
288
+ if not messages:
289
+ return ""
290
+
291
+ # Simple summarization: extract key entities and intents
292
+ intents = []
293
+ entities = set()
294
+
295
+ for msg in messages:
296
+ if msg.get("intent"):
297
+ intents.append(msg["intent"])
298
+ if msg.get("entities"):
299
+ for key, value in msg["entities"].items():
300
+ if isinstance(value, str):
301
+ entities.add(value)
302
+ elif isinstance(value, list):
303
+ entities.update(value)
304
+
305
+ summary_parts = []
306
+ if intents:
307
+ unique_intents = list(set(intents))
308
+ summary_parts.append(f"Chủ đề: {', '.join(unique_intents)}")
309
+ if entities:
310
+ summary_parts.append(f"Thông tin: {', '.join(list(entities)[:5])}")
311
+
312
+ summary = ". ".join(summary_parts)
313
+ return summary[:max_length] if len(summary) > max_length else summary
314
+
315
+ def extract_entities_llm(self, query: str) -> Dict[str, Any]:
316
+ """
317
+ Extract entities using LLM.
318
+
319
+ Args:
320
+ query: User query.
321
+
322
+ Returns:
323
+ Dictionary of extracted entities.
324
+ """
325
+ if not self.is_available():
326
+ return {}
327
+
328
+ prompt = f"""
329
+ Trích xuất các thực thể từ câu hỏi sau:
330
+ "{query}"
331
+
332
+ Các loại thực thể cần tìm:
333
+ - fine_code: Mã vi phạm (V001, V002, ...)
334
+ - fine_name: Tên vi phạm
335
+ - procedure_name: Tên thủ tục
336
+ - office_name: Tên đơn vị
337
+
338
+ Trả lời dưới dạng JSON: {{"fine_code": "...", "fine_name": "...", ...}}
339
+ Nếu không có, trả về {{}}.
340
+ """
341
+
342
+ try:
343
+ if self.provider == LLM_PROVIDER_OPENAI:
344
+ response = self._generate_openai(prompt)
345
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
346
+ response = self._generate_anthropic(prompt)
347
+ elif self.provider == LLM_PROVIDER_OLLAMA:
348
+ response = self._generate_ollama(prompt)
349
+ else:
350
+ return {}
351
+
352
+ if response:
353
+ # Try to extract JSON from response
354
+ json_match = re.search(r'\{[^}]+\}', response)
355
+ if json_match:
356
+ return json.loads(json_match.group())
357
+ except Exception as e:
358
+ print(f"Error extracting entities with LLM: {e}")
359
+
360
+ return {}
361
+
362
+
363
+ # Global LLM generator instance
364
+ _llm_generator: Optional[LLMGenerator] = None
365
+
366
+ def get_llm_generator() -> Optional[LLMGenerator]:
367
+ """Get or create LLM generator instance."""
368
+ global _llm_generator
369
+ if _llm_generator is None:
370
+ _llm_generator = LLMGenerator()
371
+ return _llm_generator if _llm_generator.is_available() else None
372
+
backend/hue_portal/chatbot/llm_integration.py.bak ADDED
@@ -0,0 +1,877 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM integration for natural answer generation.
3
+ Supports OpenAI GPT, Anthropic Claude, Ollama, Hugging Face Inference API, Local Hugging Face models, and API mode.
4
+ """
5
+ import os
6
+ import re
7
+ import json
8
+ import sys
9
+ import traceback
10
+ import logging
11
+ import time
12
+ from typing import List, Dict, Any, Optional
13
+ try:
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+ except ImportError:
17
+ pass # dotenv is optional
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Import download progress tracker (optional)
22
+ try:
23
+ from .download_progress import get_progress_tracker, DownloadProgress
24
+ PROGRESS_TRACKER_AVAILABLE = True
25
+ except ImportError:
26
+ PROGRESS_TRACKER_AVAILABLE = False
27
+ logger.warning("Download progress tracker not available")
28
+
29
+ # LLM Provider types
30
+ LLM_PROVIDER_OPENAI = "openai"
31
+ LLM_PROVIDER_ANTHROPIC = "anthropic"
32
+ LLM_PROVIDER_OLLAMA = "ollama"
33
+ LLM_PROVIDER_HUGGINGFACE = "huggingface" # Hugging Face Inference API
34
+ LLM_PROVIDER_LOCAL = "local" # Local Hugging Face Transformers model
35
+ LLM_PROVIDER_API = "api" # API mode - call HF Spaces API
36
+ LLM_PROVIDER_NONE = "none"
37
+
38
+ # Get provider from environment (default to local Qwen if none provided)
39
+ DEFAULT_LLM_PROVIDER = os.environ.get("DEFAULT_LLM_PROVIDER", LLM_PROVIDER_LOCAL).lower()
40
+ env_provider = os.environ.get("LLM_PROVIDER", "").strip().lower()
41
+ LLM_PROVIDER = env_provider or DEFAULT_LLM_PROVIDER
42
+
43
+
44
+ class LLMGenerator:
45
+ """Generate natural language answers using LLMs."""
46
+
47
+ def __init__(self, provider: Optional[str] = None):
48
+ """
49
+ Initialize LLM generator.
50
+
51
+ Args:
52
+ provider: LLM provider ('openai', 'anthropic', 'ollama', 'local', 'huggingface', 'api', or None for auto-detect).
53
+ """
54
+ self.provider = provider or LLM_PROVIDER
55
+ self.client = None
56
+ self.local_model = None
57
+ self.local_tokenizer = None
58
+ self.api_base_url = None
59
+ self._initialize_client()
60
+
61
+ def _initialize_client(self):
62
+ """Initialize LLM client based on provider."""
63
+ if self.provider == LLM_PROVIDER_OPENAI:
64
+ try:
65
+ import openai
66
+ api_key = os.environ.get("OPENAI_API_KEY")
67
+ if api_key:
68
+ self.client = openai.OpenAI(api_key=api_key)
69
+ print("✅ OpenAI client initialized")
70
+ else:
71
+ print("⚠️ OPENAI_API_KEY not found, OpenAI disabled")
72
+ except ImportError:
73
+ print("⚠️ openai package not installed, install with: pip install openai")
74
+
75
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
76
+ try:
77
+ import anthropic
78
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
79
+ if api_key:
80
+ self.client = anthropic.Anthropic(api_key=api_key)
81
+ print("✅ Anthropic client initialized")
82
+ else:
83
+ print("⚠️ ANTHROPIC_API_KEY not found, Anthropic disabled")
84
+ except ImportError:
85
+ print("⚠️ anthropic package not installed, install with: pip install anthropic")
86
+
87
+ elif self.provider == LLM_PROVIDER_OLLAMA:
88
+ self.ollama_base_url = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
89
+ self.ollama_model = os.environ.get("OLLAMA_MODEL", "qwen2.5:7b")
90
+ print(f"✅ Ollama configured (base_url: {self.ollama_base_url}, model: {self.ollama_model})")
91
+
92
+ elif self.provider == LLM_PROVIDER_HUGGINGFACE:
93
+ self.hf_api_key = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY")
94
+ self.hf_model = os.environ.get("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
95
+ if self.hf_api_key:
96
+ print(f"✅ Hugging Face API configured (model: {self.hf_model})")
97
+ else:
98
+ print("⚠️ HF_TOKEN not found, Hugging Face may have rate limits")
99
+
100
+ elif self.provider == LLM_PROVIDER_API:
101
+ # API mode - call HF Spaces API
102
+ self.api_base_url = os.environ.get(
103
+ "HF_API_BASE_URL",
104
+ "https://davidtran999-hue-portal-backend.hf.space/api"
105
+ )
106
+ print(f"✅ API mode configured (base_url: {self.api_base_url})")
107
+
108
+ elif self.provider == LLM_PROVIDER_LOCAL:
109
+ self._initialize_local_model()
110
+
111
+ else:
112
+ print("ℹ️ No LLM provider configured, using template-based generation")
113
+
114
+ def _initialize_local_model(self):
115
+ """Initialize local Hugging Face Transformers model."""
116
+ try:
117
+ from transformers import AutoModelForCausalLM, AutoTokenizer
118
+ import torch
119
+
120
+ # Default to Qwen 2.5 7B with 8-bit quantization (fits in GPU RAM)
121
+ model_path = os.environ.get("LOCAL_MODEL_PATH", "Qwen/Qwen2.5-7B-Instruct")
122
+ device = os.environ.get("LOCAL_MODEL_DEVICE", "auto") # auto, cpu, cuda
123
+
124
+ print(f"[LLM] Loading local model: {model_path}", flush=True)
125
+ logger.info(f"[LLM] Loading local model: {model_path}")
126
+
127
+ # Determine device
128
+ if device == "auto":
129
+ device = "cuda" if torch.cuda.is_available() else "cpu"
130
+
131
+ # Start cache monitoring for download progress (optional)
132
+ try:
133
+ from .cache_monitor import get_cache_monitor
134
+ monitor = get_cache_monitor()
135
+ monitor.start_monitoring(model_path, interval=2.0)
136
+ print(f"[LLM] 📊 Started cache monitoring for {model_path}", flush=True)
137
+ logger.info(f"[LLM] 📊 Started cache monitoring for {model_path}")
138
+ except Exception as e:
139
+ logger.warning(f"Could not start cache monitoring: {e}")
140
+
141
+ # Load tokenizer
142
+ print("[LLM] Loading tokenizer...", flush=True)
143
+ logger.info("[LLM] Loading tokenizer...")
144
+ try:
145
+ self.local_tokenizer = AutoTokenizer.from_pretrained(
146
+ model_path,
147
+ trust_remote_code=True
148
+ )
149
+ print("[LLM] ✅ Tokenizer loaded successfully", flush=True)
150
+ logger.info("[LLM] ✅ Tokenizer loaded successfully")
151
+ except Exception as tokenizer_err:
152
+ error_trace = traceback.format_exc()
153
+ print(f"[LLM] ❌ Tokenizer load error: {tokenizer_err}", flush=True)
154
+ print(f"[LLM] ❌ Tokenizer trace: {error_trace}", flush=True)
155
+ logger.error(f"[LLM] ❌ Tokenizer load error: {tokenizer_err}\n{error_trace}")
156
+ print(f"[LLM] ❌ ERROR: {type(tokenizer_err).__name__}: {str(tokenizer_err)}", file=sys.stderr, flush=True)
157
+ traceback.print_exc(file=sys.stderr)
158
+ raise
159
+
160
+ # Load model with optional quantization and fallback mechanism
161
+ print(f"[LLM] Loading model to {device}...", flush=True)
162
+ logger.info(f"[LLM] Loading model to {device}...")
163
+
164
+ # Check for quantization config
165
+ # Default to 8-bit for 7B (better thinking), 4-bit for larger models
166
+ default_8bit = "7b" in model_path.lower() or "7B" in model_path
167
+ default_4bit = ("32b" in model_path.lower() or "32B" in model_path or "14b" in model_path.lower() or "14B" in model_path) and not default_8bit
168
+
169
+ # Check environment variable for explicit quantization preference
170
+ quantization_pref = os.environ.get("LOCAL_MODEL_QUANTIZATION", "").lower()
171
+ if quantization_pref == "4bit":
172
+ use_8bit = False
173
+ use_4bit = True
174
+ elif quantization_pref == "8bit":
175
+ use_8bit = True
176
+ use_4bit = False
177
+ elif quantization_pref == "none":
178
+ use_8bit = False
179
+ use_4bit = False
180
+ else:
181
+ # Use defaults based on model size
182
+ use_8bit = os.environ.get("LOCAL_MODEL_8BIT", "true" if default_8bit else "false").lower() == "true"
183
+ use_4bit = os.environ.get("LOCAL_MODEL_4BIT", "true" if default_4bit else "false").lower() == "true"
184
+
185
+ # Try loading with fallback: 8-bit → 4-bit → float16
186
+ model_loaded = False
187
+ quantization_attempts = []
188
+
189
+ if device == "cuda":
190
+ # Attempt 1: Try 8-bit quantization (if requested)
191
+ if use_8bit:
192
+ quantization_attempts.append(("8-bit", True, False))
193
+
194
+ # Attempt 2: Try 4-bit quantization (if 8-bit fails or not requested)
195
+ if use_4bit or (use_8bit and not model_loaded):
196
+ quantization_attempts.append(("4-bit", False, True))
197
+
198
+ # Attempt 3: Fallback to float16 (no quantization)
199
+ quantization_attempts.append(("float16", False, False))
200
+ else:
201
+ # CPU: only float32
202
+ quantization_attempts.append(("float32", False, False))
203
+
204
+ last_error = None
205
+ for attempt_name, try_8bit, try_4bit in quantization_attempts:
206
+ if model_loaded:
207
+ break
208
+
209
+ try:
210
+ load_kwargs = {
211
+ "trust_remote_code": True,
212
+ "low_cpu_mem_usage": True,
213
+ }
214
+
215
+ if device == "cuda":
216
+ load_kwargs["device_map"] = "auto"
217
+
218
+ if try_4bit:
219
+ from transformers import BitsAndBytesConfig
220
+ load_kwargs["quantization_config"] = BitsAndBytesConfig(
221
+ load_in_4bit=True,
222
+ bnb_4bit_compute_dtype=torch.float16
223
+ )
224
+ print(f"[LLM] Attempting to load with 4-bit quantization (~4-5GB VRAM for 7B)", flush=True)
225
+ elif try_8bit:
226
+ from transformers import BitsAndBytesConfig
227
+ # Fixed: Remove CPU offload to avoid Int8Params compatibility issue
228
+ load_kwargs["quantization_config"] = BitsAndBytesConfig(
229
+ load_in_8bit=True,
230
+ llm_int8_threshold=6.0
231
+ # Removed: llm_int8_enable_fp32_cpu_offload=True (causes compatibility issues)
232
+ )
233
+ # Removed: max_memory override - let accelerate handle it automatically
234
+ print(f"[LLM] Attempting to load with 8-bit quantization (~7GB VRAM for 7B)", flush=True)
235
+ else:
236
+ load_kwargs["torch_dtype"] = torch.float16
237
+ print(f"[LLM] Attempting to load with float16 (no quantization)", flush=True)
238
+ else:
239
+ load_kwargs["torch_dtype"] = torch.float32
240
+ print(f"[LLM] Attempting to load with float32 (CPU)", flush=True)
241
+
242
+ # Load model
243
+ self.local_model = AutoModelForCausalLM.from_pretrained(
244
+ model_path,
245
+ **load_kwargs
246
+ )
247
+
248
+ # Stop cache monitoring (download complete)
249
+ try:
250
+ from .cache_monitor import get_cache_monitor
251
+ monitor = get_cache_monitor()
252
+ monitor.stop_monitoring(model_path)
253
+ print(f"[LLM] ✅ Model download complete, stopped monitoring", flush=True)
254
+ except:
255
+ pass
256
+
257
+ print(f"[LLM] ✅ Model loaded successfully with {attempt_name} quantization", flush=True)
258
+ logger.info(f"[LLM] ✅ Model loaded successfully with {attempt_name} quantization")
259
+ model_loaded = True
260
+
261
+ except Exception as model_load_err:
262
+ last_error = model_load_err
263
+ error_trace = traceback.format_exc()
264
+ print(f"[LLM] ⚠️ Failed to load with {attempt_name}: {model_load_err}", flush=True)
265
+ logger.warning(f"[LLM] ⚠️ Failed to load with {attempt_name}: {model_load_err}")
266
+
267
+ # If this was the last attempt, raise the error
268
+ if attempt_name == quantization_attempts[-1][0]:
269
+ print(f"[LLM] ❌ All quantization attempts failed. Last error: {model_load_err}", flush=True)
270
+ print(f"[LLM] ❌ Model load trace: {error_trace}", flush=True)
271
+ logger.error(f"[LLM] ❌ Model load error: {model_load_err}\n{error_trace}")
272
+ print(f"[LLM] ❌ ERROR: {type(model_load_err).__name__}: {str(model_load_err)}", file=sys.stderr, flush=True)
273
+ traceback.print_exc(file=sys.stderr)
274
+ raise
275
+ else:
276
+ # Try next quantization method
277
+ print(f"[LLM] 🔄 Falling back to next quantization method...", flush=True)
278
+ continue
279
+
280
+ if not model_loaded:
281
+ raise RuntimeError("Failed to load model with any quantization method")
282
+
283
+ if device == "cpu":
284
+ try:
285
+ self.local_model = self.local_model.to(device)
286
+ print(f"[LLM] ✅ Model moved to {device}", flush=True)
287
+ logger.info(f"[LLM] ✅ Model moved to {device}")
288
+ except Exception as move_err:
289
+ error_trace = traceback.format_exc()
290
+ print(f"[LLM] ❌ Model move error: {move_err}", flush=True)
291
+ logger.error(f"[LLM] ❌ Model move error: {move_err}\n{error_trace}")
292
+ print(f"[LLM] ❌ ERROR: {type(move_err).__name__}: {str(move_err)}", file=sys.stderr, flush=True)
293
+ traceback.print_exc(file=sys.stderr)
294
+
295
+ self.local_model.eval() # Set to evaluation mode
296
+ print(f"[LLM] ✅ Local model loaded successfully on {device}", flush=True)
297
+ logger.info(f"[LLM] ✅ Local model loaded successfully on {device}")
298
+
299
+ except ImportError as import_err:
300
+ error_msg = "transformers package not installed, install with: pip install transformers torch"
301
+ print(f"[LLM] ⚠️ {error_msg}", flush=True)
302
+ logger.warning(f"[LLM] ⚠️ {error_msg}")
303
+ print(f"[LLM] ❌ ImportError: {import_err}", file=sys.stderr, flush=True)
304
+ self.local_model = None
305
+ self.local_tokenizer = None
306
+ except Exception as e:
307
+ error_trace = traceback.format_exc()
308
+ print(f"[LLM] ❌ Error loading local model: {e}", flush=True)
309
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
310
+ logger.error(f"[LLM] ❌ Error loading local model: {e}\n{error_trace}")
311
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
312
+ traceback.print_exc(file=sys.stderr)
313
+ print("[LLM] 💡 Tip: Use smaller models like Qwen/Qwen2.5-1.5B-Instruct or Qwen/Qwen2.5-0.5B-Instruct", flush=True)
314
+ self.local_model = None
315
+ self.local_tokenizer = None
316
+
317
+ def is_available(self) -> bool:
318
+ """Check if LLM is available."""
319
+ return (
320
+ self.client is not None or
321
+ self.provider == LLM_PROVIDER_OLLAMA or
322
+ self.provider == LLM_PROVIDER_HUGGINGFACE or
323
+ self.provider == LLM_PROVIDER_API or
324
+ (self.provider == LLM_PROVIDER_LOCAL and self.local_model is not None)
325
+ )
326
+
327
+ def generate_answer(
328
+ self,
329
+ query: str,
330
+ context: Optional[List[Dict[str, Any]]] = None,
331
+ documents: Optional[List[Any]] = None
332
+ ) -> Optional[str]:
333
+ """
334
+ Generate natural language answer from documents.
335
+
336
+ Args:
337
+ query: User query.
338
+ context: Optional conversation context.
339
+ documents: Retrieved documents.
340
+
341
+ Returns:
342
+ Generated answer or None if LLM not available.
343
+ """
344
+ if not self.is_available():
345
+ return None
346
+
347
+ # Build prompt
348
+ prompt = self._build_prompt(query, context, documents)
349
+
350
+ try:
351
+ print(f"[LLM] Generating answer with provider: {self.provider}", flush=True)
352
+ logger.info(f"[LLM] Generating answer with provider: {self.provider}")
353
+
354
+ if self.provider == LLM_PROVIDER_OPENAI:
355
+ result = self._generate_openai(prompt)
356
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
357
+ result = self._generate_anthropic(prompt)
358
+ elif self.provider == LLM_PROVIDER_OLLAMA:
359
+ result = self._generate_ollama(prompt)
360
+ elif self.provider == LLM_PROVIDER_HUGGINGFACE:
361
+ result = self._generate_huggingface(prompt)
362
+ elif self.provider == LLM_PROVIDER_LOCAL:
363
+ result = self._generate_local(prompt)
364
+ elif self.provider == LLM_PROVIDER_API:
365
+ # For API mode, send the full prompt (with documents) as the message
366
+ # This ensures HF Spaces receives all context from retrieved documents
367
+ result = self._generate_api(prompt, context)
368
+ else:
369
+ result = None
370
+
371
+ if result:
372
+ print(f"[LLM] ✅ Answer generated successfully (length: {len(result)})", flush=True)
373
+ logger.info(f"[LLM] ✅ Answer generated successfully (length: {len(result)})")
374
+ else:
375
+ print(f"[LLM] ⚠️ No answer generated", flush=True)
376
+ logger.warning("[LLM] ⚠️ No answer generated")
377
+
378
+ return result
379
+ except Exception as e:
380
+ error_trace = traceback.format_exc()
381
+ print(f"[LLM] ❌ Error generating answer: {e}", flush=True)
382
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
383
+ logger.error(f"[LLM] ❌ Error generating answer: {e}\n{error_trace}")
384
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
385
+ traceback.print_exc(file=sys.stderr)
386
+ return None
387
+
388
+ def _build_prompt(
389
+ self,
390
+ query: str,
391
+ context: Optional[List[Dict[str, Any]]],
392
+ documents: Optional[List[Any]]
393
+ ) -> str:
394
+ """Build prompt for LLM."""
395
+ prompt_parts = [
396
+ "Bạn là chatbot tư vấn pháp lý của Công an Thừa Thiên Huế.",
397
+ "Nhiệm vụ: Trả lời câu hỏi của người dùng dựa trên các văn bản pháp luật và quy định được cung cấp.",
398
+ "",
399
+ f"Câu hỏi của người dùng: {query}",
400
+ ""
401
+ ]
402
+
403
+ if context:
404
+ prompt_parts.append("Ngữ cảnh cuộc hội thoại trước đó:")
405
+ for msg in context[-3:]: # Last 3 messages
406
+ role = "Người dùng" if msg.get("role") == "user" else "Bot"
407
+ content = msg.get("content", "")
408
+ prompt_parts.append(f"{role}: {content}")
409
+ prompt_parts.append("")
410
+
411
+ if documents:
412
+ prompt_parts.append("Các văn bản/quy định liên quan:")
413
+ for i, doc in enumerate(documents[:5], 1):
414
+ # Extract relevant fields based on document type
415
+ doc_text = self._format_document(doc)
416
+ prompt_parts.append(f"{i}. {doc_text}")
417
+ prompt_parts.append("")
418
+ # If documents exist, require strict adherence
419
+ prompt_parts.extend([
420
+ "Yêu cầu QUAN TRỌNG:",
421
+ "- CHỈ trả lời dựa trên thông tin trong 'Các văn bản/quy định liên quan' ở trên",
422
+ "- KHÔNG được tự tạo hoặc suy đoán thông tin không có trong tài liệu",
423
+ "- Nếu thông tin không đủ để trả lời, hãy nói rõ: 'Thông tin trong cơ sở dữ liệu chưa đủ để trả lời câu hỏi này'",
424
+ "- Nếu có mức phạt, phải ghi rõ số tiền (ví dụ: 200.000 - 400.000 VNĐ)",
425
+ "- Nếu có điều khoản, ghi rõ mã điều (ví dụ: Điều 5, Điều 10)",
426
+ "- Nếu có thủ tục, ghi rõ hồ sơ, lệ phí, thời hạn",
427
+ "- Trả lời bằng tiếng Việt, ngắn gọn, dễ hiểu",
428
+ "",
429
+ "Trả lời:"
430
+ ])
431
+ else:
432
+ # No documents - allow general conversation
433
+ prompt_parts.extend([
434
+ "Yêu cầu:",
435
+ "- Trả lời câu hỏi một cách tự nhiên và hữu ích như một chatbot AI thông thường",
436
+ "- Nếu câu hỏi liên quan đến pháp luật, thủ tục, mức phạt nhưng không có thông tin trong cơ sở dữ liệu, hãy nói: 'Tôi không tìm thấy thông tin này trong cơ sở dữ liệu. Bạn có thể liên hệ trực tiếp với Công an Thừa Thiên Huế để được tư vấn chi tiết hơn.'",
437
+ "- Trả lời bằng tiếng Việt, thân thiện, ngắn gọn, dễ hiểu",
438
+ "",
439
+ "Trả lời:"
440
+ ])
441
+
442
+ return "\n".join(prompt_parts)
443
+
444
+ def _format_document(self, doc: Any) -> str:
445
+ """Format document for prompt."""
446
+ doc_type = type(doc).__name__.lower()
447
+
448
+ if "fine" in doc_type:
449
+ parts = [f"Mức phạt: {getattr(doc, 'name', '')}"]
450
+ if hasattr(doc, 'code') and doc.code:
451
+ parts.append(f"Mã: {doc.code}")
452
+ if hasattr(doc, 'min_fine') and hasattr(doc, 'max_fine'):
453
+ if doc.min_fine and doc.max_fine:
454
+ parts.append(f"Số tiền: {doc.min_fine:,.0f} - {doc.max_fine:,.0f} VNĐ")
455
+ return " | ".join(parts)
456
+
457
+ elif "procedure" in doc_type:
458
+ parts = [f"Thủ tục: {getattr(doc, 'title', '')}"]
459
+ if hasattr(doc, 'dossier') and doc.dossier:
460
+ parts.append(f"Hồ sơ: {doc.dossier}")
461
+ if hasattr(doc, 'fee') and doc.fee:
462
+ parts.append(f"Lệ phí: {doc.fee}")
463
+ return " | ".join(parts)
464
+
465
+ elif "office" in doc_type:
466
+ parts = [f"Đơn vị: {getattr(doc, 'unit_name', '')}"]
467
+ if hasattr(doc, 'address') and doc.address:
468
+ parts.append(f"Địa chỉ: {doc.address}")
469
+ if hasattr(doc, 'phone') and doc.phone:
470
+ parts.append(f"Điện thoại: {doc.phone}")
471
+ return " | ".join(parts)
472
+
473
+ elif "advisory" in doc_type:
474
+ parts = [f"Cảnh báo: {getattr(doc, 'title', '')}"]
475
+ if hasattr(doc, 'summary') and doc.summary:
476
+ parts.append(f"Nội dung: {doc.summary[:200]}")
477
+ return " | ".join(parts)
478
+
479
+ elif "legalsection" in doc_type or "legal" in doc_type:
480
+ parts = []
481
+ if hasattr(doc, 'section_code') and doc.section_code:
482
+ parts.append(f"Điều khoản: {doc.section_code}")
483
+ if hasattr(doc, 'section_title') and doc.section_title:
484
+ parts.append(f"Tiêu đề: {doc.section_title}")
485
+ if hasattr(doc, 'document') and doc.document:
486
+ doc_obj = doc.document
487
+ if hasattr(doc_obj, 'title'):
488
+ parts.append(f"Văn bản: {doc_obj.title}")
489
+ if hasattr(doc_obj, 'code'):
490
+ parts.append(f"Mã văn bản: {doc_obj.code}")
491
+ if hasattr(doc, 'content') and doc.content:
492
+ # Truncate content to 300 chars for prompt
493
+ content_short = doc.content[:300] + "..." if len(doc.content) > 300 else doc.content
494
+ parts.append(f"Nội dung: {content_short}")
495
+ return " | ".join(parts) if parts else str(doc)
496
+
497
+ return str(doc)
498
+
499
+ def _generate_openai(self, prompt: str) -> Optional[str]:
500
+ """Generate answer using OpenAI."""
501
+ if not self.client:
502
+ return None
503
+
504
+ try:
505
+ response = self.client.chat.completions.create(
506
+ model=os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo"),
507
+ messages=[
508
+ {"role": "system", "content": "Bạn là chatbot tư vấn chuyên nghiệp."},
509
+ {"role": "user", "content": prompt}
510
+ ],
511
+ temperature=0.7,
512
+ max_tokens=500
513
+ )
514
+ return response.choices[0].message.content
515
+ except Exception as e:
516
+ print(f"OpenAI API error: {e}")
517
+ return None
518
+
519
+ def _generate_anthropic(self, prompt: str) -> Optional[str]:
520
+ """Generate answer using Anthropic Claude."""
521
+ if not self.client:
522
+ return None
523
+
524
+ try:
525
+ message = self.client.messages.create(
526
+ model=os.environ.get("ANTHROPIC_MODEL", "claude-3-5-sonnet-20241022"),
527
+ max_tokens=500,
528
+ messages=[
529
+ {"role": "user", "content": prompt}
530
+ ]
531
+ )
532
+ return message.content[0].text
533
+ except Exception as e:
534
+ print(f"Anthropic API error: {e}")
535
+ return None
536
+
537
+ def _generate_ollama(self, prompt: str) -> Optional[str]:
538
+ """Generate answer using Ollama (local LLM)."""
539
+ try:
540
+ import requests
541
+ model = getattr(self, 'ollama_model', os.environ.get("OLLAMA_MODEL", "qwen2.5:7b"))
542
+
543
+ response = requests.post(
544
+ f"{self.ollama_base_url}/api/generate",
545
+ json={
546
+ "model": model,
547
+ "prompt": prompt,
548
+ "stream": False,
549
+ "options": {
550
+ "temperature": 0.7,
551
+ "top_p": 0.9,
552
+ "num_predict": 500
553
+ }
554
+ },
555
+ timeout=60
556
+ )
557
+
558
+ if response.status_code == 200:
559
+ return response.json().get("response")
560
+ return None
561
+ except Exception as e:
562
+ print(f"Ollama API error: {e}")
563
+ return None
564
+
565
+ def _generate_huggingface(self, prompt: str) -> Optional[str]:
566
+ """Generate answer using Hugging Face Inference API."""
567
+ try:
568
+ import requests
569
+
570
+ api_url = f"https://api-inference.huggingface.co/models/{self.hf_model}"
571
+ headers = {}
572
+ if hasattr(self, 'hf_api_key') and self.hf_api_key:
573
+ headers["Authorization"] = f"Bearer {self.hf_api_key}"
574
+
575
+ response = requests.post(
576
+ api_url,
577
+ headers=headers,
578
+ json={
579
+ "inputs": prompt,
580
+ "parameters": {
581
+ "temperature": 0.7,
582
+ "max_new_tokens": 500,
583
+ "return_full_text": False
584
+ }
585
+ },
586
+ timeout=60
587
+ )
588
+
589
+ if response.status_code == 200:
590
+ result = response.json()
591
+ if isinstance(result, list) and len(result) > 0:
592
+ return result[0].get("generated_text", "")
593
+ elif isinstance(result, dict):
594
+ return result.get("generated_text", "")
595
+ elif response.status_code == 503:
596
+ # Model is loading, wait and retry
597
+ print("⚠️ Model is loading, please wait...")
598
+ return None
599
+ else:
600
+ print(f"Hugging Face API error: {response.status_code} - {response.text}")
601
+ return None
602
+ except Exception as e:
603
+ print(f"Hugging Face API error: {e}")
604
+ return None
605
+
606
+ def _generate_local(self, prompt: str) -> Optional[str]:
607
+ """Generate answer using local Hugging Face Transformers model."""
608
+ if self.local_model is None or self.local_tokenizer is None:
609
+ return None
610
+
611
+ try:
612
+ import torch
613
+
614
+ # Format prompt for Qwen models
615
+ messages = [
616
+ {"role": "system", "content": "Bạn là chatbot tư vấn chuyên nghiệp."},
617
+ {"role": "user", "content": prompt}
618
+ ]
619
+
620
+ # Apply chat template if available
621
+ if hasattr(self.local_tokenizer, "apply_chat_template"):
622
+ text = self.local_tokenizer.apply_chat_template(
623
+ messages,
624
+ tokenize=False,
625
+ add_generation_prompt=True
626
+ )
627
+ else:
628
+ text = prompt
629
+
630
+ # Tokenize
631
+ inputs = self.local_tokenizer(text, return_tensors="pt")
632
+
633
+ # Move to device
634
+ device = next(self.local_model.parameters()).device
635
+ inputs = {k: v.to(device) for k, v in inputs.items()}
636
+
637
+ # Generate
638
+ with torch.no_grad():
639
+ outputs = self.local_model.generate(
640
+ **inputs,
641
+ max_new_tokens=500,
642
+ temperature=0.7,
643
+ top_p=0.9,
644
+ do_sample=True,
645
+ pad_token_id=self.local_tokenizer.eos_token_id
646
+ )
647
+
648
+ # Decode
649
+ generated_text = self.local_tokenizer.decode(
650
+ outputs[0][inputs["input_ids"].shape[1]:],
651
+ skip_special_tokens=True
652
+ )
653
+
654
+ return generated_text.strip()
655
+
656
+ except TypeError as e:
657
+ # Check for Int8Params compatibility error
658
+ if "_is_hf_initialized" in str(e) or "Int8Params" in str(e):
659
+ error_msg = (
660
+ f"[LLM] ❌ Int8Params compatibility error: {e}\n"
661
+ f"[LLM] 💡 This error occurs when using 8-bit quantization with incompatible library versions.\n"
662
+ f"[LLM] 💡 Solutions:\n"
663
+ f"[LLM] 1. Set LOCAL_MODEL_QUANTIZATION=4bit to use 4-bit quantization instead\n"
664
+ f"[LLM] 2. Set LOCAL_MODEL_QUANTIZATION=none to disable quantization\n"
665
+ f"[LLM] 3. Use API mode (LLM_PROVIDER=api) to avoid local model issues\n"
666
+ f"[LLM] 4. Use a smaller model like Qwen/Qwen2.5-1.5B-Instruct"
667
+ )
668
+ print(error_msg, flush=True)
669
+ logger.error(f"[LLM] ❌ Int8Params compatibility error: {e}")
670
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
671
+ return None
672
+ else:
673
+ # Other TypeError, re-raise to be caught by general handler
674
+ raise
675
+ except Exception as e:
676
+ error_trace = traceback.format_exc()
677
+ print(f"[LLM] ❌ Local model generation error: {e}", flush=True)
678
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
679
+ logger.error(f"[LLM] ❌ Local model generation error: {e}\n{error_trace}")
680
+ print(f"[LLM] ❌ ERROR: {type(e).__name__}: {str(e)}", file=sys.stderr, flush=True)
681
+ traceback.print_exc(file=sys.stderr)
682
+ return None
683
+
684
+ def _generate_api(self, prompt: str, context: Optional[List[Dict[str, Any]]] = None) -> Optional[str]:
685
+ """Generate answer by calling HF Spaces API.
686
+
687
+ Args:
688
+ prompt: Full prompt including query and documents context.
689
+ context: Optional conversation context (not used in API mode, handled by HF Spaces).
690
+ """
691
+ if not self.api_base_url:
692
+ return None
693
+
694
+ try:
695
+ import requests
696
+
697
+ # Prepare request payload
698
+ # Send the full prompt (with documents) as the message to HF Spaces
699
+ # This ensures HF Spaces receives all context from retrieved documents
700
+ payload = {
701
+ "message": prompt,
702
+ "reset_session": False
703
+ }
704
+
705
+ # Only add session_id if we have a valid session context
706
+ # For now, we'll omit it and let the API generate a new one
707
+
708
+ # Add context if available (API may support this in future)
709
+ # For now, context is handled by the API internally
710
+
711
+ # Call API endpoint
712
+ api_url = f"{self.api_base_url}/chatbot/chat/"
713
+ print(f"[LLM] 🔗 Calling API: {api_url}", flush=True)
714
+ print(f"[LLM] 📤 Payload: {payload}", flush=True)
715
+
716
+ response = requests.post(
717
+ api_url,
718
+ json=payload,
719
+ headers={"Content-Type": "application/json"},
720
+ timeout=60
721
+ )
722
+
723
+ print(f"[LLM] 📥 Response status: {response.status_code}", flush=True)
724
+ print(f"[LLM] 📥 Response headers: {dict(response.headers)}", flush=True)
725
+
726
+ if response.status_code == 200:
727
+ try:
728
+ result = response.json()
729
+ print(f"[LLM] 📥 Response JSON: {result}", flush=True)
730
+ # Extract message from response
731
+ if isinstance(result, dict):
732
+ message = result.get("message", None)
733
+ if message:
734
+ print(f"[LLM] ✅ Got message from API (length: {len(message)})", flush=True)
735
+ return message
736
+ else:
737
+ print(f"[LLM] ⚠️ Response is not a dict: {type(result)}", flush=True)
738
+ return None
739
+ except ValueError as e:
740
+ print(f"[LLM] ❌ JSON decode error: {e}", flush=True)
741
+ print(f"[LLM] ❌ Response text: {response.text[:500]}", flush=True)
742
+ return None
743
+ elif response.status_code == 503:
744
+ # Service unavailable - model might be loading
745
+ print("[LLM] ⚠️ API service is loading, please wait...", flush=True)
746
+ return None
747
+ else:
748
+ print(f"[LLM] ❌ API error: {response.status_code} - {response.text[:500]}", flush=True)
749
+ return None
750
+ except requests.exceptions.Timeout:
751
+ print("[LLM] ❌ API request timeout")
752
+ return None
753
+ except requests.exceptions.ConnectionError as e:
754
+ print(f"[LLM] ❌ API connection error: {e}")
755
+ return None
756
+ except Exception as e:
757
+ error_trace = traceback.format_exc()
758
+ print(f"[LLM] ❌ API mode error: {e}", flush=True)
759
+ print(f"[LLM] ❌ Full trace: {error_trace}", flush=True)
760
+ logger.error(f"[LLM] ❌ API mode error: {e}\n{error_trace}")
761
+ return None
762
+
763
+ def summarize_context(self, messages: List[Dict[str, Any]], max_length: int = 200) -> str:
764
+ """
765
+ Summarize conversation context.
766
+
767
+ Args:
768
+ messages: List of conversation messages.
769
+ max_length: Maximum summary length.
770
+
771
+ Returns:
772
+ Summary string.
773
+ """
774
+ if not messages:
775
+ return ""
776
+
777
+ # Simple summarization: extract key entities and intents
778
+ intents = []
779
+ entities = set()
780
+
781
+ for msg in messages:
782
+ if msg.get("intent"):
783
+ intents.append(msg["intent"])
784
+ if msg.get("entities"):
785
+ for key, value in msg["entities"].items():
786
+ if isinstance(value, str):
787
+ entities.add(value)
788
+ elif isinstance(value, list):
789
+ entities.update(value)
790
+
791
+ summary_parts = []
792
+ if intents:
793
+ unique_intents = list(set(intents))
794
+ summary_parts.append(f"Chủ đề: {', '.join(unique_intents)}")
795
+ if entities:
796
+ summary_parts.append(f"Thông tin: {', '.join(list(entities)[:5])}")
797
+
798
+ summary = ". ".join(summary_parts)
799
+ return summary[:max_length] if len(summary) > max_length else summary
800
+
801
+ def extract_entities_llm(self, query: str) -> Dict[str, Any]:
802
+ """
803
+ Extract entities using LLM.
804
+
805
+ Args:
806
+ query: User query.
807
+
808
+ Returns:
809
+ Dictionary of extracted entities.
810
+ """
811
+ if not self.is_available():
812
+ return {}
813
+
814
+ prompt = f"""
815
+ Trích xuất các thực thể từ câu hỏi sau:
816
+ "{query}"
817
+
818
+ Các loại thực thể cần tìm:
819
+ - fine_code: Mã vi phạm (V001, V002, ...)
820
+ - fine_name: Tên vi phạm
821
+ - procedure_name: Tên thủ tục
822
+ - office_name: Tên đơn vị
823
+
824
+ Trả lời dưới dạng JSON: {{"fine_code": "...", "fine_name": "...", ...}}
825
+ Nếu không có, trả về {{}}.
826
+ """
827
+
828
+ try:
829
+ if self.provider == LLM_PROVIDER_OPENAI:
830
+ response = self._generate_openai(prompt)
831
+ elif self.provider == LLM_PROVIDER_ANTHROPIC:
832
+ response = self._generate_anthropic(prompt)
833
+ elif self.provider == LLM_PROVIDER_OLLAMA:
834
+ response = self._generate_ollama(prompt)
835
+ elif self.provider == LLM_PROVIDER_HUGGINGFACE:
836
+ response = self._generate_huggingface(prompt)
837
+ elif self.provider == LLM_PROVIDER_LOCAL:
838
+ response = self._generate_local(prompt)
839
+ elif self.provider == LLM_PROVIDER_API:
840
+ # For API mode, we can't extract entities directly
841
+ # Return empty dict
842
+ return {}
843
+ else:
844
+ return {}
845
+
846
+ if response:
847
+ # Try to extract JSON from response
848
+ json_match = re.search(r'\{[^}]+\}', response)
849
+ if json_match:
850
+ return json.loads(json_match.group())
851
+ except Exception as e:
852
+ print(f"Error extracting entities with LLM: {e}")
853
+
854
+ return {}
855
+
856
+
857
+ # Global LLM generator instance
858
+ _llm_generator: Optional[LLMGenerator] = None
859
+ _last_provider: Optional[str] = None
860
+
861
+ def get_llm_generator() -> Optional[LLMGenerator]:
862
+ """Get or create LLM generator instance.
863
+
864
+ Recreates instance if provider changed (e.g., from local to api).
865
+ """
866
+ global _llm_generator, _last_provider
867
+
868
+ # Get current provider from env
869
+ current_provider = os.environ.get("LLM_PROVIDER", LLM_PROVIDER_NONE).lower()
870
+
871
+ # Recreate if provider changed or instance doesn't exist
872
+ if _llm_generator is None or _last_provider != current_provider:
873
+ _llm_generator = LLMGenerator()
874
+ _last_provider = current_provider
875
+ print(f"[LLM] 🔄 Recreated LLM generator with provider: {current_provider}", flush=True)
876
+
877
+ return _llm_generator if _llm_generator.is_available() else None
backend/hue_portal/chatbot/query_expansion.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Query expansion and paraphrasing utilities for improving search recall.
3
+ """
4
+ import re
5
+ import unicodedata
6
+ from typing import List, Dict, Any, Optional, Set
7
+ from hue_portal.core.models import Synonym
8
+ from hue_portal.core.search_ml import expand_query_with_synonyms
9
+
10
+
11
+ def normalize_vietnamese_query(query: str) -> str:
12
+ """
13
+ Normalize Vietnamese text by handling diacritics variants.
14
+
15
+ Args:
16
+ query: Input query string.
17
+
18
+ Returns:
19
+ Normalized query string.
20
+ """
21
+ if not query:
22
+ return ""
23
+
24
+ # Remove extra spaces
25
+ query = re.sub(r'\s+', ' ', query.strip())
26
+
27
+ # Lowercase
28
+ query = query.lower()
29
+
30
+ return query
31
+
32
+
33
+ def extract_key_phrases(query: str) -> List[str]:
34
+ """
35
+ Extract key phrases from query.
36
+
37
+ Args:
38
+ query: Input query string.
39
+
40
+ Returns:
41
+ List of key phrases.
42
+ """
43
+ if not query:
44
+ return []
45
+
46
+ # Remove common stopwords
47
+ stopwords = {
48
+ "là", "gì", "bao nhiêu", "như thế nào", "ở đâu", "của", "và", "hoặc",
49
+ "tôi", "bạn", "có", "không", "được", "một", "các", "với", "cho"
50
+ }
51
+
52
+ # Split into words
53
+ words = re.findall(r'\b\w+\b', query.lower())
54
+
55
+ # Filter stopwords and short words
56
+ key_words = [w for w in words if w not in stopwords and len(w) > 2]
57
+
58
+ # Extract bigrams (2-word phrases)
59
+ phrases = []
60
+ for i in range(len(key_words) - 1):
61
+ phrase = f"{key_words[i]} {key_words[i+1]}"
62
+ phrases.append(phrase)
63
+
64
+ # Combine single words and phrases
65
+ all_phrases = key_words + phrases
66
+
67
+ return all_phrases
68
+
69
+
70
+ def expand_query_semantically(query: str, context: Optional[Dict[str, Any]] = None) -> List[str]:
71
+ """
72
+ Expand query with synonyms and related terms.
73
+
74
+ Args:
75
+ query: Original query string.
76
+ context: Optional context dictionary with entities, intents, etc.
77
+
78
+ Returns:
79
+ List of expanded query variations.
80
+ """
81
+ expanded = [query]
82
+
83
+ # Use existing synonym expansion
84
+ synonym_expanded = expand_query_with_synonyms(query)
85
+ expanded.extend(synonym_expanded)
86
+
87
+ # Add context-based expansions
88
+ if context:
89
+ entities = context.get("entities", {})
90
+
91
+ # If fine_code in context, add fine name variations
92
+ if "fine_code" in entities:
93
+ fine_code = entities["fine_code"]
94
+ # Could look up fine name from database and add variations
95
+ expanded.append(f"{query} {fine_code}")
96
+
97
+ # If procedure_name in context, add procedure variations
98
+ if "procedure_name" in entities:
99
+ procedure_name = entities["procedure_name"]
100
+ expanded.append(f"{query} {procedure_name}")
101
+
102
+ # Add common Vietnamese variations
103
+ variations = _get_vietnamese_variations(query)
104
+ expanded.extend(variations)
105
+
106
+ # Remove duplicates while preserving order
107
+ seen = set()
108
+ unique_expanded = []
109
+ for q in expanded:
110
+ q_normalized = normalize_vietnamese_query(q)
111
+ if q_normalized not in seen:
112
+ seen.add(q_normalized)
113
+ unique_expanded.append(q)
114
+
115
+ return unique_expanded
116
+
117
+
118
+ def _get_vietnamese_variations(query: str) -> List[str]:
119
+ """
120
+ Get common Vietnamese query variations.
121
+
122
+ Args:
123
+ query: Input query.
124
+
125
+ Returns:
126
+ List of variations.
127
+ """
128
+ variations = []
129
+ query_lower = query.lower()
130
+
131
+ # Common synonym mappings
132
+ synonym_map = {
133
+ "mức phạt": ["tiền phạt", "phạt", "xử phạt"],
134
+ "thủ tục": ["hồ sơ", "giấy tờ", "quy trình"],
135
+ "địa chỉ": ["nơi", "chỗ", "điểm"],
136
+ "số điện thoại": ["điện thoại", "số liên hệ", "hotline"],
137
+ "giờ làm việc": ["thời gian", "giờ", "lịch làm việc"],
138
+ "cảnh báo": ["thông báo", "lưu ý", "chú ý"],
139
+ "lừa đảo": ["scam", "gian lận", "lừa"],
140
+ }
141
+
142
+ for key, synonyms in synonym_map.items():
143
+ if key in query_lower:
144
+ for synonym in synonyms:
145
+ variation = query_lower.replace(key, synonym)
146
+ if variation != query_lower:
147
+ variations.append(variation)
148
+
149
+ return variations
150
+
151
+
152
+ def paraphrase_query(query: str) -> List[str]:
153
+ """
154
+ Generate paraphrases of the query to increase recall.
155
+
156
+ Args:
157
+ query: Original query string.
158
+
159
+ Returns:
160
+ List of paraphrased queries.
161
+ """
162
+ paraphrases = [query]
163
+ query_lower = query.lower()
164
+
165
+ # Common paraphrasing patterns for Vietnamese
166
+ patterns = [
167
+ # Question variations
168
+ (r"mức phạt (.+) là bao nhiêu", r"phạt \1 bao nhiêu tiền"),
169
+ (r"thủ tục (.+) cần gì", r"làm thủ tục \1 cần giấy tờ gì"),
170
+ (r"địa chỉ (.+) ở đâu", r"\1 ở đâu"),
171
+ (r"(.+) như thế nào", r"cách \1"),
172
+ ]
173
+
174
+ for pattern, replacement in patterns:
175
+ if re.search(pattern, query_lower):
176
+ paraphrase = re.sub(pattern, replacement, query_lower)
177
+ if paraphrase != query_lower:
178
+ paraphrases.append(paraphrase)
179
+
180
+ # Add question word variations
181
+ if "bao nhiêu" in query_lower:
182
+ paraphrases.append(query_lower.replace("bao nhiêu", "mức"))
183
+ paraphrases.append(query_lower.replace("bao nhiêu", "giá"))
184
+
185
+ if "như thế nào" in query_lower:
186
+ paraphrases.append(query_lower.replace("như thế nào", "cách"))
187
+ paraphrases.append(query_lower.replace("như thế nào", "quy trình"))
188
+
189
+ # Remove duplicates
190
+ return list(dict.fromkeys(paraphrases))
191
+
192
+
193
+ def enhance_query_with_context(query: str, context: Optional[Dict[str, Any]] = None) -> str:
194
+ """
195
+ Enhance query with context information.
196
+
197
+ Args:
198
+ query: Original query string.
199
+ context: Optional context dictionary.
200
+
201
+ Returns:
202
+ Enhanced query string.
203
+ """
204
+ if not context:
205
+ return query
206
+
207
+ enhanced_parts = [query]
208
+
209
+ # Add entities from context
210
+ entities = context.get("entities", {})
211
+ if "fine_code" in entities:
212
+ enhanced_parts.append(entities["fine_code"])
213
+ if "procedure_name" in entities:
214
+ enhanced_parts.append(entities["procedure_name"])
215
+ if "office_name" in entities:
216
+ enhanced_parts.append(entities["office_name"])
217
+
218
+ # Add intent-based keywords
219
+ intent = context.get("intent", "")
220
+ if intent == "search_fine":
221
+ enhanced_parts.append("mức phạt vi phạm")
222
+ elif intent == "search_procedure":
223
+ enhanced_parts.append("thủ tục hành chính")
224
+ elif intent == "search_office":
225
+ enhanced_parts.append("đơn vị công an")
226
+
227
+ return " ".join(enhanced_parts)
228
+
backend/hue_portal/chatbot/router.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Routing utilities that decide whether a query should hit RAG or stay in small-talk.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import re
7
+ from dataclasses import dataclass, field
8
+ from enum import Enum
9
+ from typing import Dict, Optional
10
+
11
+
12
+ class IntentRoute(str, Enum):
13
+ """High-level route for the chatbot pipeline."""
14
+
15
+ GREETING = "greeting"
16
+ SMALL_TALK = "small_talk"
17
+ SEARCH = "search"
18
+
19
+
20
+ DOCUMENT_CODE_PATTERNS = [
21
+ r"264[-\s]?QD[-\s]?TW",
22
+ r"QD[-\s]?69[-\s]?TW",
23
+ r"TT[-\s]?02[-\s]?CAND",
24
+ r"TT[-\s]?02[-\s]?BIEN[-\s]?SOAN",
25
+ r"QUYET[-\s]?DINH[-\s]?69",
26
+ r"QUYET[-\s]?DINH[-\s]?264",
27
+ r"THONG[-\s]?TU[-\s]?02",
28
+ ]
29
+
30
+ SMALL_TALK_PHRASES = [
31
+ "mệt quá",
32
+ "nhàm chán",
33
+ "tâm sự",
34
+ "chém gió",
35
+ "đang làm gì",
36
+ "chuyện trò",
37
+ "trò chuyện",
38
+ "hỏi chơi thôi",
39
+ ]
40
+
41
+
42
+ def _has_document_code(query: str) -> bool:
43
+ normalized = query.upper()
44
+ return any(re.search(pattern, normalized) for pattern in DOCUMENT_CODE_PATTERNS)
45
+
46
+
47
+ def _flag_keywords(query_lower: str) -> Dict[str, bool]:
48
+ return {
49
+ "greeting": any(
50
+ phrase in query_lower for phrase in ["xin chào", "xin chao", "chào", "chao", "hello", "hi"]
51
+ ),
52
+ "fine": any(
53
+ kw in query_lower
54
+ for kw in ["mức phạt", "phạt", "vi phạm", "đèn đỏ", "nồng độ cồn", "mũ bảo hiểm", "tốc độ"]
55
+ ),
56
+ "procedure": any(
57
+ kw in query_lower for kw in ["thủ tục", "thu tuc", "hồ sơ", "ho so", "điều kiện", "dieu kien", "cư trú", "cu tru"]
58
+ ),
59
+ "advisory": any(kw in query_lower for kw in ["cảnh báo", "lua dao", "lừa đảo", "scam", "mạo danh", "thủ đoạn"]),
60
+ "office": any(kw in query_lower for kw in ["địa chỉ", "dia chi", "công an", "cong an", "điểm tiếp dân", "số điện thoại"]),
61
+ "legal": any(
62
+ kw in query_lower
63
+ for kw in [
64
+ "quyết định",
65
+ "quyet dinh",
66
+ "thông tư",
67
+ "thong tu",
68
+ "nghị quyết",
69
+ "nghi quyet",
70
+ "nghị định",
71
+ "nghi dinh",
72
+ "luật",
73
+ "luat",
74
+ "điều ",
75
+ "dieu ",
76
+ "kỷ luật",
77
+ "qd 69",
78
+ "qd 264",
79
+ "thông tư 02",
80
+ "điều lệnh",
81
+ "văn bản pháp luật",
82
+ ]
83
+ ),
84
+ "small_talk": any(phrase in query_lower for phrase in SMALL_TALK_PHRASES),
85
+ }
86
+
87
+
88
+ @dataclass
89
+ class RouteDecision:
90
+ route: IntentRoute
91
+ intent: str
92
+ confidence: float
93
+ rationale: str
94
+ forced_intent: Optional[str] = None
95
+ keyword_flags: Dict[str, bool] = field(default_factory=dict)
96
+
97
+
98
+ def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
99
+ """
100
+ Decide how the chatbot should handle the query before invoking RAG.
101
+ """
102
+ query_lower = query.lower().strip()
103
+ words = query_lower.split()
104
+ keyword_flags = _flag_keywords(query_lower)
105
+ has_doc_code = _has_document_code(query_lower)
106
+
107
+ route = IntentRoute.SEARCH
108
+ rationale = "default-search"
109
+ forced_intent: Optional[str] = None
110
+
111
+ doc_code_override = False
112
+ if has_doc_code and intent != "search_legal":
113
+ forced_intent = "search_legal"
114
+ rationale = "doc-code-detected"
115
+ route = IntentRoute.SEARCH
116
+ doc_code_override = True
117
+
118
+ greeting_candidate = (
119
+ len(words) <= 3 and keyword_flags["greeting"] and not any(
120
+ keyword_flags[key] for key in ["fine", "procedure", "advisory", "office", "legal"]
121
+ )
122
+ )
123
+ if greeting_candidate and intent == "greeting" and not doc_code_override:
124
+ route = IntentRoute.GREETING
125
+ rationale = "simple-greeting"
126
+ forced_intent = "greeting"
127
+ elif (
128
+ not doc_code_override
129
+ and keyword_flags["small_talk"]
130
+ and not any(keyword_flags[key] for key in ["fine", "procedure", "advisory", "office", "legal"])
131
+ ):
132
+ route = IntentRoute.SMALL_TALK
133
+ rationale = "small-talk-keywords"
134
+ forced_intent = "general_query"
135
+ elif not doc_code_override and (intent == "general_query" or confidence < 0.55):
136
+ # Generic small talk / low confidence
137
+ route = IntentRoute.SMALL_TALK
138
+ rationale = "general-or-low-confidence"
139
+
140
+ if route != IntentRoute.GREETING and not doc_code_override:
141
+ keyword_force_map = [
142
+ ("legal", "search_legal"),
143
+ ("fine", "search_fine"),
144
+ ("procedure", "search_procedure"),
145
+ ("advisory", "search_advisory"),
146
+ ("office", "search_office"),
147
+ ]
148
+ for flag, target_intent in keyword_force_map:
149
+ if forced_intent:
150
+ break
151
+ if keyword_flags.get(flag) and intent != target_intent:
152
+ forced_intent = target_intent
153
+ route = IntentRoute.SEARCH
154
+ rationale = f"keyword-override-{flag}"
155
+ break
156
+
157
+ return RouteDecision(
158
+ route=route,
159
+ intent=intent,
160
+ confidence=confidence,
161
+ rationale=rationale,
162
+ forced_intent=forced_intent,
163
+ keyword_flags=keyword_flags,
164
+ )
165
+
backend/hue_portal/chatbot/schemas/legal_answer.rail ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <rail version="0.2">
2
+ <output>
3
+ <object name="LegalAnswer">
4
+ <string name="summary" format="no_apology vietnamese_legal_summary" />
5
+ <list name="details" min_length="2">
6
+ <string format="vietnamese_bullet_with_citation" />
7
+ </list>
8
+ <list name="citations" min_length="1">
9
+ <object>
10
+ <string name="document_title" />
11
+ <string name="section_code" />
12
+ <string name="page_range" required="false" />
13
+ <string name="summary" format="short_summary" />
14
+ <string name="snippet" />
15
+ </object>
16
+ </list>
17
+ </object>
18
+ </output>
19
+
20
+ <prompt>
21
+ Bạn là chuyên gia tư vấn về xử lí kỷ luật cán bộ đảng viên của Phòng Thanh Tra - Công An Thành Phố Huế. Tổng hợp câu trả lời dựa trên các trích đoạn đã cung cấp.
22
+
23
+ Yêu cầu bắt buộc:
24
+ - Tất cả nội dung phải bằng tiếng Việt trang trọng, không xin lỗi hay né tránh.
25
+ - Phần summary phải nhắc rõ tên văn bản chính (ví dụ: Quyết định 69/QĐ-TW) và nêu kết luận 1-2 câu.
26
+ - Mỗi phần tử trong DETAILS là một bullet mô tả hình thức xử lý hoặc điều khoản, phải ghi rõ Điều/Khoản hoặc chương tương ứng.
27
+ - DETAILS phải ghi đúng tên văn bản có trong dữ liệu (ví dụ: Quyết định 69/QĐ-TW, Thông tư 02/CAND) và không bịa ra điều khoản khác.
28
+ - CITATIONS phải chứa ít nhất một mục, mỗi mục nêu rõ văn bản, điều khoản, trang và trích đoạn ≤500 ký tự.
29
+ - Nếu thiếu thông tin, ghi rõ trong summary nhưng vẫn tuân thủ định dạng.
30
+
31
+ $context
32
+ </prompt>
33
+
34
+ <output_format>
35
+ {{output}}
36
+ </output_format>
37
+
38
+ <instructions>
39
+ <list name="no_apology">
40
+ <string>Không chứa cụm xin lỗi (ví dụ: “xin lỗi”, “rất tiếc”).</string>
41
+ <string>Bắt buộc nhắc tên văn bản pháp luật.</string>
42
+ </list>
43
+
44
+ <list name="vietnamese_legal_summary">
45
+ <string>Viết tiếng Việt trang trọng, tối đa 2 câu.</string>
46
+ <string>Nhắc tên văn bản áp dụng.</string>
47
+ </list>
48
+
49
+ <list name="vietnamese_bullet_with_citation">
50
+ <string>Mỗi bullet bắt đầu bằng dấu “- ”.</string>
51
+ <string>Có cụm “Điều” hoặc “Khoản”.</string>
52
+ <string>Phải chứa tên văn bản pháp luật (ví dụ: “Quyết định 69/QĐ-TW”).</string>
53
+ <string>Chỉ sử dụng điều/khoản xuất hiện trong dữ liệu; nếu không rõ ghi “(không nêu điều cụ thể)”.</string>
54
+ <string>Không dùng tiếng Anh hoặc tiếng Trung.</string>
55
+ <string>Không phát minh hình thức kỷ luật hoặc điều luật mới.</string>
56
+ </list>
57
+
58
+ <list name="short_summary">
59
+ <string>Tối đa 2 câu.</string>
60
+ </list>
61
+ </instructions>
62
+ </rail>
63
+
backend/hue_portal/chatbot/slow_path_handler.py ADDED
@@ -0,0 +1,1392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Slow Path Handler - Full RAG pipeline for complex queries.
3
+ """
4
+ import os
5
+ import time
6
+ import logging
7
+ import hashlib
8
+ from typing import Dict, Any, Optional, List, Set
9
+ import unicodedata
10
+ import re
11
+ from concurrent.futures import ThreadPoolExecutor, Future
12
+ import threading
13
+
14
+ from hue_portal.core.chatbot import get_chatbot, RESPONSE_TEMPLATES
15
+ from hue_portal.core.models import (
16
+ Fine,
17
+ Procedure,
18
+ Office,
19
+ Advisory,
20
+ LegalSection,
21
+ LegalDocument,
22
+ )
23
+ from hue_portal.core.search_ml import search_with_ml
24
+ from hue_portal.core.pure_semantic_search import pure_semantic_search
25
+ # Lazy import reranker to avoid blocking startup (FlagEmbedding may download model)
26
+ # from hue_portal.core.reranker import rerank_documents
27
+ from hue_portal.chatbot.llm_integration import get_llm_generator
28
+ from hue_portal.chatbot.structured_legal import format_structured_legal_answer
29
+ from hue_portal.chatbot.context_manager import ConversationContext
30
+ from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
31
+ from hue_portal.core.query_rewriter import get_query_rewriter
32
+ from hue_portal.core.pure_semantic_search import pure_semantic_search, parallel_vector_search
33
+ from hue_portal.core.redis_cache import get_redis_cache
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class SlowPathHandler:
39
+ """Handle Slow Path queries with full RAG pipeline."""
40
+
41
+ def __init__(self):
42
+ self.chatbot = get_chatbot()
43
+ self.llm_generator = get_llm_generator()
44
+ # Thread pool for parallel search (max 2 workers to avoid overwhelming DB)
45
+ self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="parallel_search")
46
+ # Cache for prefetched results by session_id (in-memory fallback)
47
+ self._prefetched_cache: Dict[str, Dict[str, Any]] = {}
48
+ self._cache_lock = threading.Lock()
49
+ # Redis cache for prefetch results
50
+ self.redis_cache = get_redis_cache()
51
+ # Prefetch cache TTL (30 minutes default)
52
+ self.prefetch_cache_ttl = int(os.environ.get("CACHE_PREFETCH_TTL", "1800"))
53
+ # Toggle wizard flow (disable to answer directly)
54
+ self.disable_wizard_flow = os.environ.get("DISABLE_WIZARD_FLOW", "false").lower() == "true"
55
+
56
+ def handle(
57
+ self,
58
+ query: str,
59
+ intent: str,
60
+ session_id: Optional[str] = None,
61
+ selected_document_code: Optional[str] = None,
62
+ ) -> Dict[str, Any]:
63
+ """
64
+ Full RAG pipeline:
65
+ 1. Search (hybrid: BM25 + vector)
66
+ 2. Retrieve top 20 documents
67
+ 3. LLM generation with structured output (for legal queries)
68
+ 4. Guardrails validation
69
+ 5. Retry up to 3 times if needed
70
+
71
+ Args:
72
+ query: User query.
73
+ intent: Detected intent.
74
+ session_id: Optional session ID for context.
75
+ selected_document_code: Selected document code from wizard.
76
+
77
+ Returns:
78
+ Response dict with message, intent, results, etc.
79
+ """
80
+ query = query.strip()
81
+ selected_document_code_normalized = (
82
+ selected_document_code.strip().upper() if selected_document_code else None
83
+ )
84
+
85
+ # Handle greetings
86
+ if intent == "greeting":
87
+ query_lower = query.lower().strip()
88
+ query_words = query_lower.split()
89
+ is_simple_greeting = (
90
+ len(query_words) <= 3 and
91
+ any(greeting in query_lower for greeting in ["xin chào", "chào", "hello", "hi"]) and
92
+ not any(kw in query_lower for kw in ["phạt", "mức phạt", "vi phạm", "thủ tục", "hồ sơ", "địa chỉ", "công an", "cảnh báo"])
93
+ )
94
+ if is_simple_greeting:
95
+ return {
96
+ "message": RESPONSE_TEMPLATES["greeting"],
97
+ "intent": "greeting",
98
+ "results": [],
99
+ "count": 0,
100
+ "_source": "slow_path"
101
+ }
102
+
103
+ # Wizard / option-first cho mọi câu hỏi pháp lý chung:
104
+ # Nếu:
105
+ # - intent là search_legal
106
+ # - chưa có selected_document_code trong session
107
+ # - trong câu hỏi không ghi rõ mã văn bản
108
+ # Thì: luôn trả về payload options để người dùng chọn văn bản trước,
109
+ # chưa generate câu trả lời chi tiết.
110
+ has_explicit_code = self._has_explicit_document_code_in_query(query)
111
+ logger.info(
112
+ "[WIZARD] Checking wizard conditions - intent=%s, selected_code=%s, has_explicit_code=%s, query='%s'",
113
+ intent,
114
+ selected_document_code_normalized,
115
+ has_explicit_code,
116
+ query[:50],
117
+ )
118
+ if (
119
+ intent == "search_legal"
120
+ and not self.disable_wizard_flow
121
+ and not selected_document_code_normalized
122
+ and not has_explicit_code
123
+ ):
124
+ logger.info("[QUERY_REWRITE] ✅ Wizard conditions met, using Query Rewrite Strategy")
125
+
126
+ # Query Rewrite Strategy: Rewrite query into 3-5 optimized legal queries
127
+ query_rewriter = get_query_rewriter(self.llm_generator)
128
+
129
+ # Get conversation context for query rewriting
130
+ context = None
131
+ if session_id:
132
+ try:
133
+ recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
134
+ context = [
135
+ {"role": msg.role, "content": msg.content}
136
+ for msg in recent_messages
137
+ ]
138
+ except Exception as exc:
139
+ logger.warning("[QUERY_REWRITE] Failed to load context: %s", exc)
140
+
141
+ # Rewrite query into 3-5 queries
142
+ rewritten_queries = query_rewriter.rewrite_query(
143
+ query,
144
+ context=context,
145
+ max_queries=5,
146
+ min_queries=3
147
+ )
148
+
149
+ if not rewritten_queries:
150
+ # Fallback to original query if rewrite fails
151
+ rewritten_queries = [query]
152
+
153
+ logger.info(
154
+ "[QUERY_REWRITE] Rewrote query into %d queries: %s",
155
+ len(rewritten_queries),
156
+ rewritten_queries[:3]
157
+ )
158
+
159
+ # Parallel vector search with multiple queries
160
+ try:
161
+ from hue_portal.core.models import LegalSection
162
+
163
+ # Search all legal sections (no document filter yet)
164
+ qs = LegalSection.objects.all()
165
+ text_fields = ["section_title", "section_code", "content"]
166
+
167
+ # Use parallel vector search
168
+ search_results = parallel_vector_search(
169
+ rewritten_queries,
170
+ qs,
171
+ top_k_per_query=5,
172
+ final_top_k=7,
173
+ text_fields=text_fields
174
+ )
175
+
176
+ # Extract unique document codes from results
177
+ doc_codes_seen: Set[str] = set()
178
+ document_options: List[Dict[str, Any]] = []
179
+
180
+ for section, score in search_results:
181
+ doc = getattr(section, "document", None)
182
+ if not doc:
183
+ continue
184
+
185
+ doc_code = getattr(doc, "code", "").upper()
186
+ if not doc_code or doc_code in doc_codes_seen:
187
+ continue
188
+
189
+ doc_codes_seen.add(doc_code)
190
+
191
+ # Get document metadata
192
+ doc_title = getattr(doc, "title", "") or doc_code
193
+ doc_summary = getattr(doc, "summary", "") or ""
194
+ if not doc_summary:
195
+ metadata = getattr(doc, "metadata", {}) or {}
196
+ if isinstance(metadata, dict):
197
+ doc_summary = metadata.get("summary", "")
198
+
199
+ document_options.append({
200
+ "code": doc_code,
201
+ "title": doc_title,
202
+ "summary": doc_summary,
203
+ "score": float(score),
204
+ "doc_type": getattr(doc, "doc_type", "") or "",
205
+ })
206
+
207
+ # Limit to top 5 documents
208
+ if len(document_options) >= 5:
209
+ break
210
+
211
+ # If no documents found, use canonical fallback
212
+ if not document_options:
213
+ logger.warning("[QUERY_REWRITE] No documents found, using canonical fallback")
214
+ canonical_candidates = [
215
+ {
216
+ "code": "264-QD-TW",
217
+ "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
218
+ "summary": "",
219
+ "doc_type": "",
220
+ },
221
+ {
222
+ "code": "QD-69-TW",
223
+ "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
224
+ "summary": "",
225
+ "doc_type": "",
226
+ },
227
+ {
228
+ "code": "TT-02-CAND",
229
+ "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
230
+ "summary": "",
231
+ "doc_type": "",
232
+ },
233
+ ]
234
+ clarification_payload = self._build_clarification_payload(
235
+ query, canonical_candidates
236
+ )
237
+ if clarification_payload:
238
+ clarification_payload.setdefault("intent", intent)
239
+ clarification_payload.setdefault("_source", "clarification")
240
+ clarification_payload.setdefault("routing", "clarification")
241
+ clarification_payload.setdefault("confidence", 0.3)
242
+ return clarification_payload
243
+
244
+ # Build options from search results
245
+ options = [
246
+ {
247
+ "code": opt["code"],
248
+ "title": opt["title"],
249
+ "reason": opt.get("summary") or f"Độ liên quan: {opt['score']:.2f}",
250
+ }
251
+ for opt in document_options
252
+ ]
253
+
254
+ # Add "Khác" option
255
+ if not any(opt.get("code") == "__other__" for opt in options):
256
+ options.append({
257
+ "code": "__other__",
258
+ "title": "Khác",
259
+ "reason": "Tôi muốn hỏi văn bản hoặc chủ đề pháp luật khác.",
260
+ })
261
+
262
+ message = (
263
+ "Tôi đã tìm thấy các văn bản pháp luật liên quan đến câu hỏi của bạn.\n\n"
264
+ "Bạn hãy chọn văn bản muốn tra cứu để tôi trả lời chi tiết hơn:"
265
+ )
266
+
267
+ logger.info(
268
+ "[QUERY_REWRITE] ✅ Found %d documents using Query Rewrite Strategy",
269
+ len(document_options)
270
+ )
271
+
272
+ return {
273
+ "type": "options",
274
+ "wizard_stage": "choose_document",
275
+ "message": message,
276
+ "options": options,
277
+ "clarification": {
278
+ "message": message,
279
+ "options": options,
280
+ },
281
+ "results": [],
282
+ "count": 0,
283
+ "intent": intent,
284
+ "_source": "query_rewrite",
285
+ "routing": "query_rewrite",
286
+ "confidence": 0.95, # High confidence with Query Rewrite Strategy
287
+ }
288
+
289
+ except Exception as exc:
290
+ logger.error(
291
+ "[QUERY_REWRITE] Error in Query Rewrite Strategy: %s, falling back to LLM suggestions",
292
+ exc,
293
+ exc_info=True
294
+ )
295
+ # Fallback to original LLM-based clarification
296
+ canonical_candidates: List[Dict[str, Any]] = []
297
+ try:
298
+ canonical_docs = list(
299
+ LegalDocument.objects.filter(
300
+ code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
301
+ )
302
+ )
303
+ for doc in canonical_docs:
304
+ summary = getattr(doc, "summary", "") or ""
305
+ metadata = getattr(doc, "metadata", {}) or {}
306
+ if not summary and isinstance(metadata, dict):
307
+ summary = metadata.get("summary", "")
308
+ canonical_candidates.append(
309
+ {
310
+ "code": doc.code,
311
+ "title": getattr(doc, "title", "") or doc.code,
312
+ "summary": summary,
313
+ "doc_type": getattr(doc, "doc_type", "") or "",
314
+ "section_title": "",
315
+ }
316
+ )
317
+ except Exception as e:
318
+ logger.warning("[CLARIFICATION] Canonical documents lookup failed: %s", e)
319
+
320
+ if not canonical_candidates:
321
+ canonical_candidates = [
322
+ {
323
+ "code": "264-QD-TW",
324
+ "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
325
+ "summary": "",
326
+ "doc_type": "",
327
+ "section_title": "",
328
+ },
329
+ {
330
+ "code": "QD-69-TW",
331
+ "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
332
+ "summary": "",
333
+ "doc_type": "",
334
+ "section_title": "",
335
+ },
336
+ {
337
+ "code": "TT-02-CAND",
338
+ "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
339
+ "summary": "",
340
+ "doc_type": "",
341
+ "section_title": "",
342
+ },
343
+ ]
344
+
345
+ clarification_payload = self._build_clarification_payload(
346
+ query, canonical_candidates
347
+ )
348
+ if clarification_payload:
349
+ clarification_payload.setdefault("intent", intent)
350
+ clarification_payload.setdefault("_source", "clarification_fallback")
351
+ clarification_payload.setdefault("routing", "clarification")
352
+ clarification_payload.setdefault("confidence", 0.3)
353
+ return clarification_payload
354
+
355
+ # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
356
+ search_result = self._search_by_intent(
357
+ intent,
358
+ query,
359
+ limit=15,
360
+ preferred_document_code=selected_document_code_normalized,
361
+ ) # Balance: 15 for good recall, not too slow
362
+
363
+ # Fast path for high-confidence legal queries (skip for complex queries)
364
+ fast_path_response = None
365
+ if intent == "search_legal" and not self._is_complex_query(query):
366
+ fast_path_response = self._maybe_fast_path_response(search_result["results"], query)
367
+ if fast_path_response:
368
+ fast_path_response["intent"] = intent
369
+ fast_path_response["_source"] = "fast_path"
370
+ return fast_path_response
371
+
372
+ # Rerank results - DISABLED for speed (can enable via ENABLE_RERANKER env var)
373
+ # Reranker adds 1-3 seconds delay, skip for faster responses
374
+ enable_reranker = os.environ.get("ENABLE_RERANKER", "false").lower() == "true"
375
+ if intent == "search_legal" and enable_reranker:
376
+ try:
377
+ # Lazy import to avoid blocking startup (FlagEmbedding may download model)
378
+ from hue_portal.core.reranker import rerank_documents
379
+
380
+ legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
381
+ if len(legal_results) > 0:
382
+ # Rerank to top-4 (balance speed and context quality)
383
+ top_k = min(4, len(legal_results))
384
+ reranked = rerank_documents(query, legal_results, top_k=top_k)
385
+ # Update search_result with reranked results (keep non-legal results)
386
+ non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
387
+ search_result["results"] = reranked + non_legal
388
+ search_result["count"] = len(search_result["results"])
389
+ logger.info(
390
+ "[RERANKER] Reranked %d legal results to top-%d for query: %s",
391
+ len(legal_results),
392
+ top_k,
393
+ query[:50]
394
+ )
395
+ except Exception as e:
396
+ logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
397
+ elif intent == "search_legal":
398
+ # Skip reranking for speed - just use top results by score
399
+ logger.debug("[RERANKER] Skipped reranking for speed (ENABLE_RERANKER=false)")
400
+
401
+ # BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
402
+ # Chỉ áp dụng cho legal queries có results với score cao
403
+ if intent == "search_legal" and search_result["count"] > 0:
404
+ top_result = search_result["results"][0]
405
+ top_score = top_result.get("score", 0.0) or 0.0
406
+ top_data = top_result.get("data", {})
407
+ doc_code = (top_data.get("document_code") or "").upper()
408
+ content = top_data.get("content", "") or top_data.get("excerpt", "")
409
+
410
+ # Bypass LLM nếu:
411
+ # 1. Có document code (TT-02-CAND, etc.) và content đủ dài
412
+ # 2. Score >= 0.4 (giảm threshold để dễ trigger hơn)
413
+ # 3. Hoặc có keywords quan trọng (%, hạ bậc, thi đua, tỷ lệ) với score >= 0.3
414
+ should_bypass = False
415
+ query_lower = query.lower()
416
+ has_keywords = any(kw in query_lower for kw in ["%", "phần trăm", "tỷ lệ", "12%", "20%", "10%", "hạ bậc", "thi đua", "xếp loại", "vi phạm", "cán bộ"])
417
+
418
+ # Điều kiện bypass dễ hơn: có doc_code + content đủ dài + score hợp lý
419
+ if doc_code and len(content) > 100:
420
+ if top_score >= 0.4:
421
+ should_bypass = True
422
+ elif has_keywords and top_score >= 0.3:
423
+ should_bypass = True
424
+ # Hoặc có keywords quan trọng + content đủ dài
425
+ elif has_keywords and len(content) > 100 and top_score >= 0.3:
426
+ should_bypass = True
427
+
428
+ if should_bypass:
429
+ # Template trả thẳng cho query về tỷ lệ vi phạm + hạ bậc thi đua
430
+ if any(kw in query_lower for kw in ["12%", "tỷ lệ", "phần trăm", "hạ bậc", "thi đua"]):
431
+ # Query về tỷ lệ vi phạm và hạ bậc thi đua
432
+ section_code = top_data.get("section_code", "")
433
+ section_title = top_data.get("section_title", "")
434
+ doc_title = top_data.get("document_title", "văn bản pháp luật")
435
+
436
+ # Trích xuất đoạn liên quan từ content
437
+ content_preview = content[:600] + "..." if len(content) > 600 else content
438
+
439
+ answer = (
440
+ f"Theo {doc_title} ({doc_code}):\n\n"
441
+ f"{section_code}: {section_title}\n\n"
442
+ f"{content_preview}\n\n"
443
+ f"Nguồn: {section_code}, {doc_title} ({doc_code})"
444
+ )
445
+ else:
446
+ # Template chung cho legal queries
447
+ section_code = top_data.get("section_code", "Điều liên quan")
448
+ section_title = top_data.get("section_title", "")
449
+ doc_title = top_data.get("document_title", "văn bản pháp luật")
450
+ content_preview = content[:500] + "..." if len(content) > 500 else content
451
+
452
+ answer = (
453
+ f"Kết quả chính xác nhất:\n\n"
454
+ f"- Văn bản: {doc_title} ({doc_code})\n"
455
+ f"- Điều khoản: {section_code}" + (f" – {section_title}" if section_title else "") + "\n\n"
456
+ f"{content_preview}\n\n"
457
+ f"Nguồn: {section_code}, {doc_title} ({doc_code})"
458
+ )
459
+
460
+ logger.info(
461
+ "[BYPASS_LLM] Using raw template for legal query (score=%.3f, doc=%s, query='%s')",
462
+ top_score,
463
+ doc_code,
464
+ query[:50]
465
+ )
466
+
467
+ return {
468
+ "message": answer,
469
+ "intent": intent,
470
+ "confidence": min(0.99, top_score + 0.05),
471
+ "results": search_result["results"][:3],
472
+ "count": min(3, search_result["count"]),
473
+ "_source": "raw_template",
474
+ "routing": "raw_template"
475
+ }
476
+
477
+ # Get conversation context if available
478
+ context = None
479
+ context_summary = ""
480
+ if session_id:
481
+ try:
482
+ recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
483
+ context = [
484
+ {
485
+ "role": msg.role,
486
+ "content": msg.content,
487
+ "intent": msg.intent
488
+ }
489
+ for msg in recent_messages
490
+ ]
491
+ # Tạo context summary để đưa vào prompt nếu có conversation history
492
+ if len(context) > 1:
493
+ context_parts = []
494
+ for msg in reversed(context[-3:]): # Chỉ lấy 3 message gần nhất
495
+ if msg["role"] == "user":
496
+ context_parts.append(f"Người dùng: {msg['content'][:100]}")
497
+ elif msg["role"] == "bot":
498
+ context_parts.append(f"Bot: {msg['content'][:100]}")
499
+ if context_parts:
500
+ context_summary = "\n\nNgữ cảnh cuộc trò chuyện trước đó:\n" + "\n".join(context_parts)
501
+ except Exception as exc:
502
+ logger.warning("[CONTEXT] Failed to load conversation context: %s", exc)
503
+
504
+ # Enhance query with context if available
505
+ enhanced_query = query
506
+ if context_summary:
507
+ enhanced_query = query + context_summary
508
+
509
+ # Generate response message using LLM if available and we have documents
510
+ message = None
511
+ if self.llm_generator and search_result["count"] > 0:
512
+ # For legal queries, use structured output (top-4 for good context and speed)
513
+ if intent == "search_legal" and search_result["results"]:
514
+ legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:4] # Top-4 for balance
515
+ if legal_docs:
516
+ structured_answer = self.llm_generator.generate_structured_legal_answer(
517
+ enhanced_query, # Dùng enhanced_query có context
518
+ legal_docs,
519
+ prefill_summary=None
520
+ )
521
+ if structured_answer:
522
+ message = format_structured_legal_answer(structured_answer)
523
+
524
+ # For other intents or if structured failed, use regular LLM generation
525
+ if not message:
526
+ documents = [r["data"] for r in search_result["results"][:4]] # Top-4 for balance
527
+ message = self.llm_generator.generate_answer(
528
+ enhanced_query, # Dùng enhanced_query có context
529
+ context=context,
530
+ documents=documents
531
+ )
532
+
533
+ # Fallback to template if LLM not available or failed
534
+ if not message:
535
+ if search_result["count"] > 0:
536
+ # Đặc biệt xử lý legal queries: format tốt hơn thay vì dùng template chung
537
+ if intent == "search_legal" and search_result["results"]:
538
+ top_result = search_result["results"][0]
539
+ top_data = top_result.get("data", {})
540
+ doc_code = top_data.get("document_code", "")
541
+ doc_title = top_data.get("document_title", "văn bản pháp luật")
542
+ section_code = top_data.get("section_code", "")
543
+ section_title = top_data.get("section_title", "")
544
+ content = top_data.get("content", "") or top_data.get("excerpt", "")
545
+
546
+ if content and len(content) > 50:
547
+ content_preview = content[:400] + "..." if len(content) > 400 else content
548
+ message = (
549
+ f"Tôi tìm thấy {search_result['count']} điều khoản liên quan đến '{query}':\n\n"
550
+ f"**{section_code}**: {section_title or 'Nội dung liên quan'}\n\n"
551
+ f"{content_preview}\n\n"
552
+ f"Nguồn: {doc_title}" + (f" ({doc_code})" if doc_code else "")
553
+ )
554
+ else:
555
+ template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
556
+ message = template.format(
557
+ count=search_result["count"],
558
+ query=query
559
+ )
560
+ else:
561
+ template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
562
+ message = template.format(
563
+ count=search_result["count"],
564
+ query=query
565
+ )
566
+ else:
567
+ message = RESPONSE_TEMPLATES["no_results"].format(query=query)
568
+
569
+ # Limit results to top 5 for response
570
+ results = search_result["results"][:5]
571
+
572
+ response = {
573
+ "message": message,
574
+ "intent": intent,
575
+ "confidence": 0.95, # High confidence for Slow Path (thorough search)
576
+ "results": results,
577
+ "count": len(results),
578
+ "_source": "slow_path"
579
+ }
580
+
581
+ return response
582
+
583
+ def _maybe_request_clarification(
584
+ self,
585
+ query: str,
586
+ search_result: Dict[str, Any],
587
+ selected_document_code: Optional[str] = None,
588
+ ) -> Optional[Dict[str, Any]]:
589
+ """
590
+ Quyết định có nên hỏi người dùng chọn văn bản (wizard step: choose_document).
591
+
592
+ Nguyên tắc option-first:
593
+ - Nếu user CHƯA chọn văn bản trong session
594
+ - Và trong câu hỏi KHÔNG ghi rõ mã văn bản
595
+ - Và search có trả về kết quả
596
+ => Ưu tiên trả về danh sách văn bản để người dùng chọn, thay vì trả lời thẳng.
597
+ """
598
+ if selected_document_code:
599
+ return None
600
+ if not search_result or search_result.get("count", 0) == 0:
601
+ return None
602
+
603
+ # Nếu người dùng đã ghi rõ mã văn bản trong câu hỏi (ví dụ: 264/QĐ-TW)
604
+ # thì không cần hỏi lại – ưu tiên dùng chính mã đó.
605
+ if self._has_explicit_document_code_in_query(query):
606
+ return None
607
+
608
+ # Ưu tiên dùng danh sách văn bản "chuẩn" (canonical) nếu có trong DB.
609
+ # Tuy nhiên, để đảm bảo wizard luôn hoạt động (option-first),
610
+ # nếu DB chưa đủ dữ liệu thì vẫn build danh sách tĩnh fallback.
611
+ fallback_candidates: List[Dict[str, Any]] = []
612
+ try:
613
+ fallback_docs = list(
614
+ LegalDocument.objects.filter(
615
+ code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
616
+ )
617
+ )
618
+ for doc in fallback_docs:
619
+ summary = getattr(doc, "summary", "") or ""
620
+ metadata = getattr(doc, "metadata", {}) or {}
621
+ if not summary and isinstance(metadata, dict):
622
+ summary = metadata.get("summary", "")
623
+ fallback_candidates.append(
624
+ {
625
+ "code": doc.code,
626
+ "title": getattr(doc, "title", "") or doc.code,
627
+ "summary": summary,
628
+ "doc_type": getattr(doc, "doc_type", "") or "",
629
+ "section_title": "",
630
+ }
631
+ )
632
+ except Exception as exc:
633
+ logger.warning(
634
+ "[CLARIFICATION] Fallback documents lookup failed, using static list: %s",
635
+ exc,
636
+ )
637
+
638
+ # Nếu DB chưa có đủ thông tin, luôn cung cấp danh sách tĩnh tối thiểu,
639
+ # để wizard option-first vẫn hoạt động.
640
+ if not fallback_candidates:
641
+ fallback_candidates = [
642
+ {
643
+ "code": "264-QD-TW",
644
+ "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
645
+ "summary": "",
646
+ "doc_type": "",
647
+ "section_title": "",
648
+ },
649
+ {
650
+ "code": "QD-69-TW",
651
+ "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
652
+ "summary": "",
653
+ "doc_type": "",
654
+ "section_title": "",
655
+ },
656
+ {
657
+ "code": "TT-02-CAND",
658
+ "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
659
+ "summary": "",
660
+ "doc_type": "",
661
+ "section_title": "",
662
+ },
663
+ ]
664
+
665
+ payload = self._build_clarification_payload(query, fallback_candidates)
666
+ if payload:
667
+ logger.info(
668
+ "[CLARIFICATION] Requesting user choice among canonical documents: %s",
669
+ [c["code"] for c in fallback_candidates],
670
+ )
671
+ return payload
672
+
673
+ def _has_explicit_document_code_in_query(self, query: str) -> bool:
674
+ """
675
+ Check if the raw query string explicitly contains a known document code
676
+ pattern (e.g. '264/QĐ-TW', 'QD-69-TW', 'TT-02-CAND').
677
+
678
+ Khác với _detect_document_code (dò toàn bộ bảng LegalDocument theo token),
679
+ hàm này chỉ dựa trên các regex cố định để tránh over-detect cho câu hỏi
680
+ chung chung như 'xử lí kỷ luật đảng viên thế nào'.
681
+ """
682
+ normalized = self._remove_accents(query).upper()
683
+ if not normalized:
684
+ return False
685
+ for pattern in DOCUMENT_CODE_PATTERNS:
686
+ try:
687
+ if re.search(pattern, normalized):
688
+ return True
689
+ except re.error:
690
+ # Nếu pattern không hợp lệ thì bỏ qua, không chặn flow
691
+ continue
692
+ return False
693
+
694
+ def _collect_document_candidates(
695
+ self,
696
+ legal_results: List[Dict[str, Any]],
697
+ limit: int = 4,
698
+ ) -> List[Dict[str, Any]]:
699
+ """Collect unique document candidates from legal results."""
700
+ ordered_codes: List[str] = []
701
+ seen: set[str] = set()
702
+ for result in legal_results:
703
+ data = result.get("data", {})
704
+ code = (data.get("document_code") or "").strip()
705
+ if not code:
706
+ continue
707
+ upper = code.upper()
708
+ if upper in seen:
709
+ continue
710
+ ordered_codes.append(code)
711
+ seen.add(upper)
712
+ if len(ordered_codes) >= limit:
713
+ break
714
+ if len(ordered_codes) < 2:
715
+ return []
716
+ try:
717
+ documents = {
718
+ doc.code.upper(): doc
719
+ for doc in LegalDocument.objects.filter(code__in=ordered_codes)
720
+ }
721
+ except Exception as exc:
722
+ logger.warning("[CLARIFICATION] Unable to load documents for candidates: %s", exc)
723
+ documents = {}
724
+ candidates: List[Dict[str, Any]] = []
725
+ for code in ordered_codes:
726
+ upper = code.upper()
727
+ doc_obj = documents.get(upper)
728
+ section = next(
729
+ (
730
+ res
731
+ for res in legal_results
732
+ if (res.get("data", {}).get("document_code") or "").strip().upper() == upper
733
+ ),
734
+ None,
735
+ )
736
+ data = section.get("data", {}) if section else {}
737
+ summary = ""
738
+ if doc_obj:
739
+ summary = doc_obj.summary or ""
740
+ if not summary and isinstance(doc_obj.metadata, dict):
741
+ summary = doc_obj.metadata.get("summary", "")
742
+ if not summary:
743
+ summary = data.get("excerpt") or data.get("content", "")[:200]
744
+ candidates.append(
745
+ {
746
+ "code": code,
747
+ "title": data.get("document_title") or (doc_obj.title if doc_obj else code),
748
+ "summary": summary,
749
+ "doc_type": doc_obj.doc_type if doc_obj else "",
750
+ "section_title": data.get("section_title") or "",
751
+ }
752
+ )
753
+ return candidates
754
+
755
+ def _build_clarification_payload(
756
+ self,
757
+ query: str,
758
+ candidates: List[Dict[str, Any]],
759
+ ) -> Optional[Dict[str, Any]]:
760
+ if not candidates:
761
+ return None
762
+ default_message = (
763
+ "Tôi tìm thấy một số văn bản có thể phù hợp. "
764
+ "Bạn vui lòng chọn văn bản muốn tra cứu để tôi trả lời chính xác hơn."
765
+ )
766
+ llm_payload = self._call_clarification_llm(query, candidates)
767
+ message = default_message
768
+ options: List[Dict[str, Any]] = []
769
+
770
+ # Ưu tiên dùng gợi ý từ LLM, nhưng phải luôn đảm bảo có options fallback
771
+ if llm_payload:
772
+ message = llm_payload.get("message") or default_message
773
+ raw_options = llm_payload.get("options")
774
+ if isinstance(raw_options, list):
775
+ options = [
776
+ {
777
+ "code": (opt.get("code") or candidate.get("code", "")).upper(),
778
+ "title": opt.get("title") or opt.get("document_title") or candidate.get("title", ""),
779
+ "reason": opt.get("reason")
780
+ or opt.get("summary")
781
+ or candidate.get("summary")
782
+ or candidate.get("section_title")
783
+ or "",
784
+ }
785
+ for opt, candidate in zip(
786
+ raw_options,
787
+ candidates[: len(raw_options)],
788
+ )
789
+ if (opt.get("code") or candidate.get("code"))
790
+ and (opt.get("title") or opt.get("document_title") or candidate.get("title"))
791
+ ]
792
+
793
+ # Nếu LLM không trả về options hợp lệ → fallback build từ candidates
794
+ if not options:
795
+ options = [
796
+ {
797
+ "code": candidate["code"].upper(),
798
+ "title": candidate["title"],
799
+ "reason": candidate.get("summary") or candidate.get("section_title") or "",
800
+ }
801
+ for candidate in candidates[:3]
802
+ ]
803
+ if not any(opt.get("code") == "__other__" for opt in options):
804
+ options.append(
805
+ {
806
+ "code": "__other__",
807
+ "title": "Khác",
808
+ "reason": "Tôi muốn hỏi văn bản hoặc chủ đề khác",
809
+ }
810
+ )
811
+ return {
812
+ # Wizard-style payload: ưu tiên dạng options cho UI
813
+ "type": "options",
814
+ "wizard_stage": "choose_document",
815
+ "message": message,
816
+ "options": options,
817
+ "clarification": {
818
+ "message": message,
819
+ "options": options,
820
+ },
821
+ "results": [],
822
+ "count": 0,
823
+ }
824
+
825
+ def _call_clarification_llm(
826
+ self,
827
+ query: str,
828
+ candidates: List[Dict[str, Any]],
829
+ ) -> Optional[Dict[str, Any]]:
830
+ if not self.llm_generator:
831
+ return None
832
+ try:
833
+ return self.llm_generator.suggest_clarification_topics(
834
+ query,
835
+ candidates,
836
+ max_options=3,
837
+ )
838
+ except Exception as exc:
839
+ logger.warning("[CLARIFICATION] LLM suggestion failed: %s", exc)
840
+ return None
841
+
842
+ def _parallel_search_prepare(
843
+ self,
844
+ document_code: str,
845
+ keywords: List[str],
846
+ session_id: Optional[str] = None,
847
+ ) -> None:
848
+ """
849
+ Trigger parallel search in background when user selects a document option.
850
+ Stores results in cache for Stage 2 (choose topic).
851
+
852
+ Args:
853
+ document_code: Selected document code
854
+ keywords: Keywords extracted from query/options
855
+ session_id: Session ID for caching results
856
+ """
857
+ if not session_id:
858
+ return
859
+
860
+ def _search_task():
861
+ try:
862
+ logger.info(
863
+ "[PARALLEL_SEARCH] Starting background search for doc=%s, keywords=%s",
864
+ document_code,
865
+ keywords[:5],
866
+ )
867
+
868
+ # Check Redis cache first
869
+ cache_key = f"prefetch:{document_code.upper()}:{hashlib.sha256(' '.join(keywords).encode()).hexdigest()[:16]}"
870
+ cached_result = None
871
+ if self.redis_cache and self.redis_cache.is_available():
872
+ cached_result = self.redis_cache.get(cache_key)
873
+ if cached_result:
874
+ logger.info(
875
+ "[PARALLEL_SEARCH] ✅ Cache hit for doc=%s",
876
+ document_code
877
+ )
878
+ # Store in in-memory cache too
879
+ with self._cache_lock:
880
+ if session_id not in self._prefetched_cache:
881
+ self._prefetched_cache[session_id] = {}
882
+ self._prefetched_cache[session_id]["document_results"] = cached_result
883
+ return
884
+
885
+ # Search in the selected document
886
+ query_text = " ".join(keywords) if keywords else ""
887
+ search_result = self._search_by_intent(
888
+ intent="search_legal",
889
+ query=query_text,
890
+ limit=20, # Get more results for topic options
891
+ preferred_document_code=document_code.upper(),
892
+ )
893
+
894
+ # Prepare cache data
895
+ cache_data = {
896
+ "document_code": document_code,
897
+ "results": search_result.get("results", []),
898
+ "count": search_result.get("count", 0),
899
+ "timestamp": time.time(),
900
+ }
901
+
902
+ # Store in Redis cache
903
+ if self.redis_cache and self.redis_cache.is_available():
904
+ self.redis_cache.set(cache_key, cache_data, ttl_seconds=self.prefetch_cache_ttl)
905
+ logger.debug(
906
+ "[PARALLEL_SEARCH] Cached prefetch results (TTL: %ds)",
907
+ self.prefetch_cache_ttl
908
+ )
909
+
910
+ # Store in in-memory cache (fallback)
911
+ with self._cache_lock:
912
+ if session_id not in self._prefetched_cache:
913
+ self._prefetched_cache[session_id] = {}
914
+ self._prefetched_cache[session_id]["document_results"] = cache_data
915
+
916
+ logger.info(
917
+ "[PARALLEL_SEARCH] Completed background search for doc=%s, found %d results",
918
+ document_code,
919
+ search_result.get("count", 0),
920
+ )
921
+ except Exception as exc:
922
+ logger.warning("[PARALLEL_SEARCH] Background search failed: %s", exc)
923
+
924
+ # Submit to thread pool
925
+ self._executor.submit(_search_task)
926
+
927
+ def _parallel_search_topic(
928
+ self,
929
+ document_code: str,
930
+ topic_keywords: List[str],
931
+ session_id: Optional[str] = None,
932
+ ) -> None:
933
+ """
934
+ Trigger parallel search when user selects a topic option.
935
+ Stores results for final answer generation.
936
+
937
+ Args:
938
+ document_code: Selected document code
939
+ topic_keywords: Keywords from selected topic
940
+ session_id: Session ID for caching results
941
+ """
942
+ if not session_id:
943
+ return
944
+
945
+ def _search_task():
946
+ try:
947
+ logger.info(
948
+ "[PARALLEL_SEARCH] Starting topic search for doc=%s, keywords=%s",
949
+ document_code,
950
+ topic_keywords[:5],
951
+ )
952
+
953
+ # Search with topic keywords
954
+ query_text = " ".join(topic_keywords) if topic_keywords else ""
955
+ search_result = self._search_by_intent(
956
+ intent="search_legal",
957
+ query=query_text,
958
+ limit=10,
959
+ preferred_document_code=document_code.upper(),
960
+ )
961
+
962
+ # Store in cache
963
+ with self._cache_lock:
964
+ if session_id not in self._prefetched_cache:
965
+ self._prefetched_cache[session_id] = {}
966
+ self._prefetched_cache[session_id]["topic_results"] = {
967
+ "document_code": document_code,
968
+ "keywords": topic_keywords,
969
+ "results": search_result.get("results", []),
970
+ "count": search_result.get("count", 0),
971
+ "timestamp": time.time(),
972
+ }
973
+
974
+ logger.info(
975
+ "[PARALLEL_SEARCH] Completed topic search, found %d results",
976
+ search_result.get("count", 0),
977
+ )
978
+ except Exception as exc:
979
+ logger.warning("[PARALLEL_SEARCH] Topic search failed: %s", exc)
980
+
981
+ # Submit to thread pool
982
+ self._executor.submit(_search_task)
983
+
984
+ def _get_prefetched_results(
985
+ self,
986
+ session_id: Optional[str],
987
+ result_type: str = "document_results",
988
+ ) -> Optional[Dict[str, Any]]:
989
+ """
990
+ Get prefetched search results from cache.
991
+
992
+ Args:
993
+ session_id: Session ID
994
+ result_type: "document_results" or "topic_results"
995
+
996
+ Returns:
997
+ Cached results dict or None
998
+ """
999
+ if not session_id:
1000
+ return None
1001
+
1002
+ with self._cache_lock:
1003
+ cache_entry = self._prefetched_cache.get(session_id)
1004
+ if not cache_entry:
1005
+ return None
1006
+
1007
+ results = cache_entry.get(result_type)
1008
+ if not results:
1009
+ return None
1010
+
1011
+ # Check if results are still fresh (within 5 minutes)
1012
+ timestamp = results.get("timestamp", 0)
1013
+ if time.time() - timestamp > 300: # 5 minutes
1014
+ logger.debug("[PARALLEL_SEARCH] Prefetched results expired for session=%s", session_id)
1015
+ return None
1016
+
1017
+ return results
1018
+
1019
+ def _clear_prefetched_cache(self, session_id: Optional[str]) -> None:
1020
+ """Clear prefetched cache for a session."""
1021
+ if not session_id:
1022
+ return
1023
+
1024
+ with self._cache_lock:
1025
+ if session_id in self._prefetched_cache:
1026
+ del self._prefetched_cache[session_id]
1027
+ logger.debug("[PARALLEL_SEARCH] Cleared cache for session=%s", session_id)
1028
+
1029
+ def _search_by_intent(
1030
+ self,
1031
+ intent: str,
1032
+ query: str,
1033
+ limit: int = 5,
1034
+ preferred_document_code: Optional[str] = None,
1035
+ ) -> Dict[str, Any]:
1036
+ """Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
1037
+ # Use original query for better matching
1038
+ keywords = query.strip()
1039
+ extracted = " ".join(self.chatbot.extract_keywords(query))
1040
+ if extracted and len(extracted) > 2:
1041
+ keywords = f"{keywords} {extracted}"
1042
+
1043
+ results = []
1044
+
1045
+ if intent == "search_fine":
1046
+ qs = Fine.objects.all()
1047
+ text_fields = ["name", "code", "article", "decree", "remedial"]
1048
+ search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
1049
+ results = [{"type": "fine", "data": {
1050
+ "id": f.id,
1051
+ "name": f.name,
1052
+ "code": f.code,
1053
+ "min_fine": float(f.min_fine) if f.min_fine else None,
1054
+ "max_fine": float(f.max_fine) if f.max_fine else None,
1055
+ "article": f.article,
1056
+ "decree": f.decree,
1057
+ }} for f in search_results]
1058
+
1059
+ elif intent == "search_procedure":
1060
+ qs = Procedure.objects.all()
1061
+ text_fields = ["title", "domain", "conditions", "dossier"]
1062
+ search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
1063
+ results = [{"type": "procedure", "data": {
1064
+ "id": p.id,
1065
+ "title": p.title,
1066
+ "domain": p.domain,
1067
+ "level": p.level,
1068
+ }} for p in search_results]
1069
+
1070
+ elif intent == "search_office":
1071
+ qs = Office.objects.all()
1072
+ text_fields = ["unit_name", "address", "district", "service_scope"]
1073
+ search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
1074
+ results = [{"type": "office", "data": {
1075
+ "id": o.id,
1076
+ "unit_name": o.unit_name,
1077
+ "address": o.address,
1078
+ "district": o.district,
1079
+ "phone": o.phone,
1080
+ "working_hours": o.working_hours,
1081
+ }} for o in search_results]
1082
+
1083
+ elif intent == "search_advisory":
1084
+ qs = Advisory.objects.all()
1085
+ text_fields = ["title", "summary"]
1086
+ search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
1087
+ results = [{"type": "advisory", "data": {
1088
+ "id": a.id,
1089
+ "title": a.title,
1090
+ "summary": a.summary,
1091
+ }} for a in search_results]
1092
+
1093
+ elif intent == "search_legal":
1094
+ qs = LegalSection.objects.all()
1095
+ text_fields = ["section_title", "section_code", "content"]
1096
+ detected_code = self._detect_document_code(query)
1097
+ effective_code = preferred_document_code or detected_code
1098
+ filtered = False
1099
+ if effective_code:
1100
+ filtered_qs = qs.filter(document__code__iexact=effective_code)
1101
+ if filtered_qs.exists():
1102
+ qs = filtered_qs
1103
+ filtered = True
1104
+ logger.info(
1105
+ "[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
1106
+ effective_code,
1107
+ query,
1108
+ )
1109
+ else:
1110
+ logger.info(
1111
+ "[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
1112
+ effective_code,
1113
+ )
1114
+ else:
1115
+ logger.debug("[SEARCH] No document code detected for query: %s", query)
1116
+ # Use pure semantic search (100% vector, no BM25)
1117
+ search_results = pure_semantic_search(
1118
+ [keywords],
1119
+ qs,
1120
+ top_k=limit, # limit=15 for reranking, will be reduced to 4
1121
+ text_fields=text_fields
1122
+ )
1123
+ results = self._format_legal_results(search_results, detected_code, query=query)
1124
+ logger.info(
1125
+ "[SEARCH] Legal intent processed (query='%s', code=%s, filtered=%s, results=%d)",
1126
+ query,
1127
+ detected_code or "None",
1128
+ filtered,
1129
+ len(results),
1130
+ )
1131
+
1132
+ return {
1133
+ "intent": intent,
1134
+ "query": query,
1135
+ "keywords": keywords,
1136
+ "results": results,
1137
+ "count": len(results),
1138
+ "detected_code": detected_code,
1139
+ }
1140
+
1141
+ def _should_save_to_golden(self, query: str, response: Dict) -> bool:
1142
+ """
1143
+ Decide if response should be saved to golden dataset.
1144
+
1145
+ Criteria:
1146
+ - High confidence (>0.95)
1147
+ - Has results
1148
+ - Response is complete and well-formed
1149
+ - Not already in golden dataset
1150
+ """
1151
+ try:
1152
+ from hue_portal.core.models import GoldenQuery
1153
+
1154
+ # Check if already exists
1155
+ query_normalized = self._normalize_query(query)
1156
+ if GoldenQuery.objects.filter(query_normalized=query_normalized, is_active=True).exists():
1157
+ return False
1158
+
1159
+ # Check criteria
1160
+ has_results = response.get("count", 0) > 0
1161
+ has_message = bool(response.get("message", "").strip())
1162
+ confidence = response.get("confidence", 0.0)
1163
+
1164
+ # Only save if high quality
1165
+ if has_results and has_message and confidence >= 0.95:
1166
+ # Additional check: message should be substantial (not just template)
1167
+ message = response.get("message", "")
1168
+ if len(message) > 50: # Substantial response
1169
+ return True
1170
+
1171
+ return False
1172
+ except Exception as e:
1173
+ logger.warning(f"Error checking if should save to golden: {e}")
1174
+ return False
1175
+
1176
+ def _normalize_query(self, query: str) -> str:
1177
+ """Normalize query for matching."""
1178
+ normalized = query.lower().strip()
1179
+ # Remove accents
1180
+ normalized = unicodedata.normalize("NFD", normalized)
1181
+ normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
1182
+ # Remove extra spaces
1183
+ normalized = re.sub(r'\s+', ' ', normalized).strip()
1184
+ return normalized
1185
+
1186
+ def _detect_document_code(self, query: str) -> Optional[str]:
1187
+ """Detect known document code mentioned in the query."""
1188
+ normalized_query = self._remove_accents(query).upper()
1189
+ if not normalized_query:
1190
+ return None
1191
+ try:
1192
+ codes = LegalDocument.objects.values_list("code", flat=True)
1193
+ except Exception as exc:
1194
+ logger.debug("Unable to fetch document codes: %s", exc)
1195
+ return None
1196
+
1197
+ for code in codes:
1198
+ if not code:
1199
+ continue
1200
+ tokens = self._split_code_tokens(code)
1201
+ if tokens and all(token in normalized_query for token in tokens):
1202
+ logger.info("[SEARCH] Detected document code %s in query", code)
1203
+ return code
1204
+ return None
1205
+
1206
+ def _split_code_tokens(self, code: str) -> List[str]:
1207
+ """Split a document code into uppercase accentless tokens."""
1208
+ normalized = self._remove_accents(code).upper()
1209
+ return [tok for tok in re.split(r"[-/\s]+", normalized) if tok]
1210
+
1211
+ def _remove_accents(self, text: str) -> str:
1212
+ if not text:
1213
+ return ""
1214
+ normalized = unicodedata.normalize("NFD", text)
1215
+ return "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
1216
+
1217
+ def _format_legal_results(
1218
+ self,
1219
+ search_results: List[Any],
1220
+ detected_code: Optional[str],
1221
+ query: Optional[str] = None,
1222
+ ) -> List[Dict[str, Any]]:
1223
+ """Build legal result payload and apply ordering/boosting based on doc code and keywords."""
1224
+ entries: List[Dict[str, Any]] = []
1225
+ upper_detected = detected_code.upper() if detected_code else None
1226
+
1227
+ # Keywords that indicate important legal concepts (boost score if found)
1228
+ important_keywords = []
1229
+ if query:
1230
+ query_lower = query.lower()
1231
+ # Keywords for percentage/threshold queries
1232
+ if any(kw in query_lower for kw in ["%", "phần trăm", "tỷ lệ", "12%", "20%", "10%"]):
1233
+ important_keywords.extend(["%", "phần trăm", "tỷ lệ", "12", "20", "10"])
1234
+ # Keywords for ranking/demotion queries
1235
+ if any(kw in query_lower for kw in ["hạ bậc", "thi đua", "xếp loại", "đánh giá"]):
1236
+ important_keywords.extend(["hạ bậc", "thi đua", "xếp loại", "đánh giá"])
1237
+
1238
+ for ls in search_results:
1239
+ doc = ls.document
1240
+ doc_code = doc.code if doc else None
1241
+ score = getattr(ls, "_ml_score", getattr(ls, "rank", 0.0)) or 0.0
1242
+
1243
+ # Boost score if content contains important keywords
1244
+ content_text = (ls.content or ls.section_title or "").lower()
1245
+ keyword_boost = 0.0
1246
+ if important_keywords and content_text:
1247
+ for kw in important_keywords:
1248
+ if kw.lower() in content_text:
1249
+ keyword_boost += 0.15 # Boost 0.15 per keyword match
1250
+ logger.debug(
1251
+ "[BOOST] Keyword '%s' found in section %s, boosting score",
1252
+ kw,
1253
+ ls.section_code,
1254
+ )
1255
+
1256
+ entries.append(
1257
+ {
1258
+ "type": "legal",
1259
+ "score": float(score) + keyword_boost,
1260
+ "data": {
1261
+ "id": ls.id,
1262
+ "section_code": ls.section_code,
1263
+ "section_title": ls.section_title,
1264
+ "content": ls.content[:500] if ls.content else "",
1265
+ "excerpt": ls.excerpt,
1266
+ "document_code": doc_code,
1267
+ "document_title": doc.title if doc else None,
1268
+ "page_start": ls.page_start,
1269
+ "page_end": ls.page_end,
1270
+ },
1271
+ }
1272
+ )
1273
+
1274
+ if upper_detected:
1275
+ exact_matches = [
1276
+ r for r in entries if (r["data"].get("document_code") or "").upper() == upper_detected
1277
+ ]
1278
+ if exact_matches:
1279
+ others = [r for r in entries if r not in exact_matches]
1280
+ entries = exact_matches + others
1281
+ else:
1282
+ for entry in entries:
1283
+ doc_code = (entry["data"].get("document_code") or "").upper()
1284
+ if doc_code == upper_detected:
1285
+ entry["score"] = (entry.get("score") or 0.1) * 10
1286
+ entries.sort(key=lambda r: r.get("score") or 0, reverse=True)
1287
+ else:
1288
+ # Sort by boosted score
1289
+ entries.sort(key=lambda r: r.get("score") or 0, reverse=True)
1290
+ return entries
1291
+
1292
+ def _is_complex_query(self, query: str) -> bool:
1293
+ """
1294
+ Detect if query is complex and requires LLM reasoning (not suitable for Fast Path).
1295
+
1296
+ Complex queries contain keywords like: %, bậc, thi đua, tỷ lệ, liên đới, tăng nặng, giảm nhẹ, đơn vị vi phạm
1297
+ """
1298
+ if not query:
1299
+ return False
1300
+ query_lower = query.lower()
1301
+ complex_keywords = [
1302
+ "%", "phần trăm",
1303
+ "bậc", "hạ bậc", "nâng bậc",
1304
+ "thi đua", "xếp loại", "đánh giá",
1305
+ "tỷ lệ", "tỉ lệ",
1306
+ "liên đới", "liên quan",
1307
+ "tăng nặng", "tăng nặng hình phạt",
1308
+ "giảm nhẹ", "giảm nhẹ hình phạt",
1309
+ "đơn vị vi phạm", "đơn vị có",
1310
+ ]
1311
+ for keyword in complex_keywords:
1312
+ if keyword in query_lower:
1313
+ logger.info(
1314
+ "[FAST_PATH] Complex query detected (keyword: '%s'), forcing Slow Path",
1315
+ keyword,
1316
+ )
1317
+ return True
1318
+ return False
1319
+
1320
+ def _maybe_fast_path_response(
1321
+ self, results: List[Dict[str, Any]], query: Optional[str] = None
1322
+ ) -> Optional[Dict[str, Any]]:
1323
+ """Return fast-path response if results are confident enough."""
1324
+ if not results:
1325
+ return None
1326
+
1327
+ # Double-check: if query is complex, never use Fast Path
1328
+ if query and self._is_complex_query(query):
1329
+ return None
1330
+ top_result = results[0]
1331
+ top_score = top_result.get("score", 0.0) or 0.0
1332
+ doc_code = (top_result.get("data", {}).get("document_code") or "").upper()
1333
+
1334
+ if top_score >= 0.88 and doc_code:
1335
+ logger.info(
1336
+ "[FAST_PATH] Top score hit (%.3f) for document %s", top_score, doc_code
1337
+ )
1338
+ message = self._format_fast_legal_message(top_result)
1339
+ return {
1340
+ "message": message,
1341
+ "results": results[:3],
1342
+ "count": min(3, len(results)),
1343
+ "confidence": min(0.99, top_score + 0.05),
1344
+ }
1345
+
1346
+ top_three = results[:3]
1347
+ if len(top_three) >= 2:
1348
+ doc_codes = [
1349
+ (res.get("data", {}).get("document_code") or "").upper()
1350
+ for res in top_three
1351
+ if res.get("data", {}).get("document_code")
1352
+ ]
1353
+ if doc_codes and len(set(doc_codes)) == 1:
1354
+ logger.info(
1355
+ "[FAST_PATH] Top-%d results share same document %s",
1356
+ len(top_three),
1357
+ doc_codes[0],
1358
+ )
1359
+ message = self._format_fast_legal_message(top_three[0])
1360
+ return {
1361
+ "message": message,
1362
+ "results": top_three,
1363
+ "count": len(top_three),
1364
+ "confidence": min(0.97, (top_three[0].get("score") or 0.9) + 0.04),
1365
+ }
1366
+ return None
1367
+
1368
+ def _format_fast_legal_message(self, result: Dict[str, Any]) -> str:
1369
+ """Format a concise legal answer without LLM."""
1370
+ data = result.get("data", {})
1371
+ doc_title = data.get("document_title") or "văn bản pháp luật"
1372
+ doc_code = data.get("document_code") or ""
1373
+ section_code = data.get("section_code") or "Điều liên quan"
1374
+ section_title = data.get("section_title") or ""
1375
+ content = (data.get("content") or data.get("excerpt") or "").strip()
1376
+ if len(content) > 400:
1377
+ trimmed = content[:400].rsplit(" ", 1)[0]
1378
+ content = f"{trimmed}..."
1379
+ intro = "Kết quả chính xác nhất:"
1380
+ lines = [intro]
1381
+ if doc_title or doc_code:
1382
+ lines.append(f"- Văn bản: {doc_title or 'văn bản pháp luật'}" + (f" ({doc_code})" if doc_code else ""))
1383
+ section_label = section_code
1384
+ if section_title:
1385
+ section_label = f"{section_code} – {section_title}"
1386
+ lines.append(f"- Điều khoản: {section_label}")
1387
+ lines.append("")
1388
+ lines.append(content)
1389
+ citation_doc = doc_title or doc_code or "nguồn chính thức"
1390
+ lines.append(f"\nNguồn: {section_label}, {citation_doc}.")
1391
+ return "\n".join(lines)
1392
+
backend/hue_portal/chatbot/structured_legal.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Structured legal answer helpers using LangChain output parsers.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ import logging
9
+ import textwrap
10
+ from functools import lru_cache
11
+ from typing import List, Optional, Sequence
12
+
13
+ from langchain.output_parsers import PydanticOutputParser
14
+ from langchain.schema import OutputParserException
15
+ from pydantic import BaseModel, Field
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class LegalCitation(BaseModel):
21
+ """Single citation item pointing back to a legal document."""
22
+
23
+ document_title: str = Field(..., description="Tên văn bản pháp luật.")
24
+ section_code: str = Field(..., description="Mã điều/khoản được trích dẫn.")
25
+ page_range: Optional[str] = Field(
26
+ None, description="Trang hoặc khoảng trang trong tài liệu."
27
+ )
28
+ summary: str = Field(
29
+ ...,
30
+ description="1-2 câu mô tả nội dung chính của trích dẫn, phải liên quan trực tiếp câu hỏi.",
31
+ )
32
+ snippet: str = Field(
33
+ ..., description="Trích đoạn ngắn gọn (≤500 ký tự) lấy từ tài liệu gốc."
34
+ )
35
+
36
+
37
+ class LegalAnswer(BaseModel):
38
+ """Structured answer returned by the LLM."""
39
+
40
+ summary: str = Field(
41
+ ...,
42
+ description="Đoạn mở đầu tóm tắt kết luận chính, phải nhắc văn bản áp dụng (ví dụ Quyết định 69/QĐ-TW).",
43
+ )
44
+ details: List[str] = Field(
45
+ ...,
46
+ description="Tối thiểu 2 gạch đầu dòng mô tả từng hình thức/điều khoản. Mỗi gạch đầu dòng phải nhắc mã điều hoặc tên văn bản.",
47
+ )
48
+ citations: List[LegalCitation] = Field(
49
+ ...,
50
+ description="Danh sách trích dẫn; phải có ít nhất 1 phần tử tương ứng với các tài liệu đã cung cấp.",
51
+ )
52
+
53
+
54
+ @lru_cache(maxsize=1)
55
+ def get_legal_output_parser() -> PydanticOutputParser:
56
+ """Return cached parser to enforce structured output."""
57
+
58
+ return PydanticOutputParser(pydantic_object=LegalAnswer)
59
+
60
+
61
+ def build_structured_legal_prompt(
62
+ query: str,
63
+ documents: Sequence,
64
+ parser: PydanticOutputParser,
65
+ prefill_summary: Optional[str] = None,
66
+ retry_hint: Optional[str] = None,
67
+ ) -> str:
68
+ """Construct prompt instructing the LLM to return structured JSON."""
69
+
70
+ doc_blocks = []
71
+ # 4 chunks for good context and speed balance
72
+ for idx, doc in enumerate(documents[:4], 1):
73
+ document = getattr(doc, "document", None)
74
+ title = getattr(document, "title", "") or "Không rõ tên văn bản"
75
+ code = getattr(document, "code", "") or "N/A"
76
+ section_code = getattr(doc, "section_code", "") or "Không rõ điều"
77
+ section_title = getattr(doc, "section_title", "") or ""
78
+ page_range = _format_page_range(doc)
79
+ content = getattr(doc, "content", "") or ""
80
+ # Increased snippet to 500 chars to use more RAM and provide better context
81
+ snippet = (content[:500] + "...") if len(content) > 500 else content
82
+
83
+ block = textwrap.dedent(
84
+ f"""
85
+ TÀI LIỆU #{idx}
86
+ Văn bản: {title} (Mã: {code})
87
+ Điều/khoản: {section_code} - {section_title}
88
+ Trang: {page_range or 'Không rõ'}
89
+ Trích đoạn:
90
+ {snippet}
91
+ """
92
+ ).strip()
93
+ doc_blocks.append(block)
94
+
95
+ docs_text = "\n\n".join(doc_blocks)
96
+ reference_lines = []
97
+ title_section_pairs = []
98
+ # 4 chunks to match doc_blocks for balance
99
+ for doc in documents[:4]:
100
+ document = getattr(doc, "document", None)
101
+ title = getattr(document, "title", "") or "Không rõ tên văn bản"
102
+ section_code = getattr(doc, "section_code", "") or "Không rõ điều"
103
+ reference_lines.append(f"- {title} | {section_code}")
104
+ title_section_pairs.append((title, section_code))
105
+ reference_text = "\n".join(reference_lines)
106
+ prefill_block = ""
107
+ if prefill_summary:
108
+ prefill_block = textwrap.dedent(
109
+ f"""
110
+ Bản tóm tắt tiếng Việt đã có sẵn (hãy dùng lại, diễn đạt ngắn gọn hơn, KHÔNG thêm thông tin mới):
111
+ {prefill_summary.strip()}
112
+ """
113
+ ).strip()
114
+ format_instructions = parser.get_format_instructions()
115
+ retry_hint_block = ""
116
+ if retry_hint:
117
+ retry_hint_block = textwrap.dedent(
118
+ f"""
119
+ Nhắc lại: {retry_hint.strip()}
120
+ """
121
+ ).strip()
122
+
123
+ prompt = textwrap.dedent(
124
+ f"""
125
+ Bạn là chuyên gia tư vấn về xử lí kỷ luật cán bộ đảng viên của Phòng Thanh Tra - Công An Thành Phố Huế. Chỉ trả lời dựa trên context được cung cấp, không suy diễn hay tạo thông tin mới.
126
+
127
+ Câu hỏi: {query}
128
+
129
+ Context được sắp xếp theo độ liên quan giảm dần (tài liệu #1 là liên quan nhất):
130
+ {docs_text}
131
+
132
+ Bảng tham chiếu (chỉ s�� dụng đúng tên/mã dưới đây):
133
+ {reference_text}
134
+
135
+ Quy tắc bắt buộc:
136
+ 1. CHỈ trả lời dựa trên thông tin trong context ở trên, không tự tạo hoặc suy đoán.
137
+ 2. Phải nhắc rõ văn bản (ví dụ: Thông tư 02 về xử lý điều lệnh trong CAND) và mã điều/khoản chính xác (ví dụ: Điều 7, Điều 8).
138
+ 3. Nếu câu hỏi về tỷ lệ phần trăm, hạ bậc thi đua, xếp loại → phải tìm đúng điều khoản quy định về tỷ lệ đó.
139
+ 4. Nếu KHÔNG tìm thấy thông tin về tỷ lệ %, hạ bậc thi đua trong context → trả lời rõ: "Thông tư 02 không quy định xử lý đơn vị theo tỷ lệ phần trăm vi phạm trong năm" (đừng trích bừa điều khoản khác).
140
+ 5. Cấu trúc trả lời:
141
+ - SUMMARY: Tóm tắt ngắn gọn kết luận chính, nhắc văn bản và điều khoản áp dụng
142
+ - DETAILS: Tối thiểu 2 bullet, mỗi bullet phải có mã điều/khoản và nội dung cụ thể
143
+ - CITATIONS: Danh sách trích dẫn với document_title, section_code, snippet ≤500 ký tự
144
+ 6. Tuyệt đối không chép lại schema hay thêm khóa "$defs"; chỉ xuất đối tượng JSON cuối cùng.
145
+ 7. Chỉ in ra CHÍNH XÁC một JSON object, không thêm chữ 'json', không dùng ``` hoặc văn bản thừa.
146
+
147
+ Ví dụ định dạng:
148
+ {{
149
+ "summary": "Theo Thông tư 02 về xử lý điều lệnh trong CAND, đơn vị có 12% cán bộ vi phạm điều lệnh trong năm sẽ bị hạ 1 bậc thi đua (Điều 7).",
150
+ "details": [
151
+ "- Điều 7 quy định: Đơn vị có từ 10% đến dưới 20% cán bộ vi phạm điều lệnh trong năm sẽ bị hạ 1 bậc thi đua.",
152
+ "- Điều 8 quy định: Đơn vị có từ 20% trở lên cán bộ vi phạm điều lệnh trong năm sẽ bị hạ 2 bậc thi đua."
153
+ ],
154
+ "citations": [
155
+ {{
156
+ "document_title": "Thông tư 02 về xử lý điều lệnh trong CAND",
157
+ "section_code": "Điều 7",
158
+ "page_range": "5-6",
159
+ "summary": "Quy định về hạ bậc thi đua theo tỷ lệ vi phạm",
160
+ "snippet": "Đơn vị có từ 10% đến dưới 20% cán bộ vi phạm điều lệnh trong năm sẽ bị hạ 1 bậc thi đua..."
161
+ }}
162
+ ]
163
+ }}
164
+
165
+ {prefill_block}
166
+
167
+ {retry_hint_block}
168
+
169
+ {format_instructions}
170
+ """
171
+ ).strip()
172
+
173
+ return prompt
174
+
175
+
176
+ def format_structured_legal_answer(answer: LegalAnswer) -> str:
177
+ """Convert structured answer into human-friendly text with citations."""
178
+
179
+ lines: List[str] = []
180
+ if answer.summary:
181
+ lines.append(answer.summary.strip())
182
+
183
+ if answer.details:
184
+ lines.append("")
185
+ lines.append("Chi tiết chính:")
186
+ for bullet in answer.details:
187
+ lines.append(f"- {bullet.strip()}")
188
+
189
+ if answer.citations:
190
+ lines.append("")
191
+ lines.append("Trích dẫn chi tiết:")
192
+ for idx, citation in enumerate(answer.citations, 1):
193
+ page_text = f" (Trang: {citation.page_range})" if citation.page_range else ""
194
+ lines.append(
195
+ f"{idx}. {citation.document_title} – {citation.section_code}{page_text}"
196
+ )
197
+ lines.append(f" Tóm tắt: {citation.summary.strip()}")
198
+ lines.append(f" Trích đoạn: {citation.snippet.strip()}")
199
+
200
+ return "\n".join(lines).strip()
201
+
202
+
203
+ def _format_page_range(doc: object) -> Optional[str]:
204
+ start = getattr(doc, "page_start", None)
205
+ end = getattr(doc, "page_end", None)
206
+ if start and end:
207
+ if start == end:
208
+ return str(start)
209
+ return f"{start}-{end}"
210
+ if start:
211
+ return str(start)
212
+ if end:
213
+ return str(end)
214
+ return None
215
+
216
+
217
+ def parse_structured_output(
218
+ parser: PydanticOutputParser, raw_output: str
219
+ ) -> Optional[LegalAnswer]:
220
+ """Parse raw LLM output to LegalAnswer if possible."""
221
+
222
+ if not raw_output:
223
+ return None
224
+ try:
225
+ return parser.parse(raw_output)
226
+ except OutputParserException:
227
+ snippet = raw_output.strip().replace("\n", " ")
228
+ logger.warning(
229
+ "[LLM] Structured parse failed. Preview: %s",
230
+ snippet[:400],
231
+ )
232
+ json_candidate = _extract_json_block(raw_output)
233
+ if json_candidate:
234
+ try:
235
+ return parser.parse(json_candidate)
236
+ except OutputParserException:
237
+ logger.warning("[LLM] JSON reparse also failed.")
238
+ return None
239
+ return None
240
+
241
+
242
+ def _extract_json_block(text: str) -> Optional[str]:
243
+ """
244
+ Best-effort extraction of the first JSON object within text.
245
+ """
246
+ stripped = text.strip()
247
+ if stripped.startswith("```"):
248
+ stripped = stripped.lstrip("`")
249
+ if stripped.lower().startswith("json"):
250
+ stripped = stripped[4:]
251
+ stripped = stripped.strip("`").strip()
252
+
253
+ start = text.find("{")
254
+ if start == -1:
255
+ return None
256
+
257
+ stack = 0
258
+ for idx in range(start, len(text)):
259
+ char = text[idx]
260
+ if char == "{":
261
+ stack += 1
262
+ elif char == "}":
263
+ stack -= 1
264
+ if stack == 0:
265
+ payload = text[start : idx + 1]
266
+ # Remove code fences if present
267
+ payload = payload.strip()
268
+ if payload.startswith("```"):
269
+ payload = payload.strip("`").strip()
270
+ try:
271
+ json.loads(payload)
272
+ return payload
273
+ except json.JSONDecodeError:
274
+ return None
275
+ return None
276
+
backend/hue_portal/chatbot/tests/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Test suite for chatbot module."""
backend/hue_portal/chatbot/tests/__pycache__/test_smoke.cpython-310.pyc ADDED
Binary file (1.71 kB). View file
 
backend/hue_portal/chatbot/tests/test_intent_keywords.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from hue_portal.chatbot.chatbot import Chatbot
4
+
5
+
6
+ class IntentKeywordTests(unittest.TestCase):
7
+ @classmethod
8
+ def setUpClass(cls):
9
+ cls.bot = Chatbot()
10
+
11
+ def test_office_keywords_have_priority(self):
12
+ intent, confidence = self.bot.classify_intent("Cho mình địa chỉ Công an phường An Cựu", context=None)
13
+ self.assertEqual(intent, "search_office")
14
+ self.assertGreaterEqual(confidence, 0.7)
15
+
16
+ def test_document_code_forces_search_legal(self):
17
+ intent, confidence = self.bot.classify_intent("Quyết định 69 quy định gì về kỷ luật?", context=None)
18
+ self.assertEqual(intent, "search_legal")
19
+ self.assertGreaterEqual(confidence, 0.8)
20
+
21
+ def test_fine_keywords_override_greeting(self):
22
+ intent, confidence = self.bot.classify_intent("Chào bạn mức phạt vượt đèn đỏ là bao nhiêu", context=None)
23
+ self.assertEqual(intent, "search_fine")
24
+ self.assertGreaterEqual(confidence, 0.8)
25
+
26
+
27
+ if __name__ == "__main__":
28
+ unittest.main()
29
+
backend/hue_portal/chatbot/tests/test_intent_training.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ import unittest
4
+
5
+ from hue_portal.chatbot.training import train_intent
6
+
7
+
8
+ class IntentTrainingTestCase(unittest.TestCase):
9
+ def test_train_pipeline_produces_artifacts(self):
10
+ model_path, metrics_path, metrics = train_intent.train(train_intent.DEFAULT_DATASET, test_size=0.3, random_state=123)
11
+
12
+ self.assertTrue(model_path.exists(), "Model artifact should be created")
13
+ self.assertTrue(metrics_path.exists(), "Metrics file should be created")
14
+
15
+ payload = json.loads(metrics_path.read_text(encoding="utf-8"))
16
+ self.assertIn("accuracy", payload)
17
+ self.assertGreaterEqual(payload["accuracy"], 0.0)
18
+ self.assertLessEqual(payload["accuracy"], 1.0)
19
+
20
+
21
+ if __name__ == "__main__":
22
+ unittest.main()
backend/hue_portal/chatbot/tests/test_router.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.test import SimpleTestCase
2
+
3
+ from hue_portal.chatbot.router import IntentRoute, decide_route
4
+
5
+
6
+ class RouterDecisionTests(SimpleTestCase):
7
+ def test_simple_greeting_routed_to_greeting(self):
8
+ decision = decide_route("chào bạn", "greeting", 0.9)
9
+ self.assertEqual(decision.route, IntentRoute.GREETING)
10
+ self.assertEqual(decision.forced_intent, "greeting")
11
+
12
+ def test_doc_code_forces_search_legal(self):
13
+ decision = decide_route("Cho tôi xem quyết định 69 nói gì", "general_query", 0.4)
14
+ self.assertEqual(decision.route, IntentRoute.SEARCH)
15
+ self.assertEqual(decision.forced_intent, "search_legal")
16
+
17
+ def test_low_confidence_goes_to_small_talk(self):
18
+ decision = decide_route("tôi mệt quá", "general_query", 0.2)
19
+ self.assertEqual(decision.route, IntentRoute.SMALL_TALK)
20
+ self.assertEqual(decision.forced_intent, "general_query")
21
+
22
+ def test_confident_fine_query_stays_search(self):
23
+ decision = decide_route("mức phạt vượt đèn đỏ là gì", "search_fine", 0.92)
24
+ self.assertEqual(decision.route, IntentRoute.SEARCH)
25
+ self.assertIsNone(decision.forced_intent)
26
+
27
+ def test_small_talk_routes_to_small_talk(self):
28
+ decision = decide_route("mệt quá hôm nay", "general_query", 0.4)
29
+ self.assertEqual(decision.route, IntentRoute.SMALL_TALK)
30
+ self.assertEqual(decision.forced_intent, "general_query")
31
+
32
+ def test_keyword_override_forces_fine_intent(self):
33
+ decision = decide_route("phạt vượt đèn đỏ sao vậy", "general_query", 0.5)
34
+ self.assertEqual(decision.route, IntentRoute.SEARCH)
35
+ self.assertEqual(decision.forced_intent, "search_fine")
36
+
37
+ def test_keyword_override_forces_procedure_intent(self):
38
+ decision = decide_route("thủ tục cư trú cần hồ sơ gì", "general_query", 0.5)
39
+ self.assertEqual(decision.route, IntentRoute.SEARCH)
40
+ self.assertEqual(decision.forced_intent, "search_procedure")
41
+
backend/hue_portal/chatbot/tests/test_smoke.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Smoke tests to ensure chatbot + essential management commands work."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from django.core.management import call_command, load_command_class
6
+ from django.test import TestCase
7
+
8
+ from hue_portal.chatbot.chatbot import get_chatbot
9
+
10
+
11
+ class ChatbotSmokeTests(TestCase):
12
+ """Verify chatbot core components can initialize without errors."""
13
+
14
+ def test_chatbot_initializes_once(self) -> None:
15
+ bot = get_chatbot()
16
+ self.assertIsNotNone(bot)
17
+ # Intent classifier should be available after initialization/training
18
+ self.assertIsNotNone(bot.intent_classifier)
19
+
20
+
21
+ class ManagementCommandSmokeTests(TestCase):
22
+ """Ensure critical management commands are wired correctly."""
23
+
24
+ def test_django_check_command(self) -> None:
25
+ call_command("check")
26
+
27
+ def test_retry_ingestion_command_loads(self) -> None:
28
+ load_command_class("hue_portal.core", "retry_ingestion_job")
29
+