# app.py — Sentiment Analysis with Copy & Export (CSV/XLSX) import gradio as gr from transformers import pipeline import re from functools import lru_cache import logging from typing import List, Dict, Tuple import json import os import tempfile # ===== NEW: pandas สำหรับ export CSV/XLSX ===== try: import pandas as pd except Exception: pd = None # ===== Logging ===== logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ===== Model list ===== MODEL_LIST = [ ("ZombitX64/MultiSent-E5-Pro", "🏆 MultiSent E5 Pro - แนะนำ (ความแม่นยำสูงสุด)"), ("ZombitX64/Thai-sentiment-e5", "🎯 Thai Sentiment E5 - เฉพาะภาษาไทย"), ("poom-sci/WangchanBERTa-finetuned-sentiment", "🔥 WangchanBERTa - โมเดลไทยยอดนิยม"), ("SandboxBhh/sentiment-thai-text-model", "✨ Sandbox Thai - เร็วและแม่นยำ"), ("ZombitX64/MultiSent-E5", "⚡ MultiSent E5 - รวดเร็ว"), ("Thaweewat/wangchanberta-hyperopt-sentiment-01", "🧠 WangchanBERTa Hyperopt"), ("cardiffnlp/twitter-xlm-roberta-base-sentiment", "🌐 XLM-RoBERTa - หลายภาษา"), ("phoner45/wangchan-sentiment-thai-text-model", "📱 Wangchan Mobile"), ("ZombitX64/Sentiment-01", "🔬 Sentiment v1"), ("ZombitX64/Sentiment-02", "🔬 Sentiment v2"), ("ZombitX64/Sentiment-03", "🔬 Sentiment v3"), ("ZombitX64/sentiment-103", "🔬 Sentiment 103"), ("ZombitX64/sentimentSumdata-v1", "🔬 sentimentSumdata-v1"), ("ZombitX64/wangchanberta-att-spm-uncased-sentiment", "wangchanberta-att-spm-uncased-sentiment"), ] # ===== Cache model loading ===== @lru_cache(maxsize=3) def get_nlp(model_name: str): try: return pipeline("sentiment-analysis", model=model_name) except Exception as e: logger.error(f"Error loading model {model_name}: {e}") raise gr.Error(f"ไม่สามารถโหลดโมเดล {model_name} ได้: {str(e)}") # ===== Label mappings ===== MODEL_LABEL_MAPPINGS = { "ZombitX64/wangchanberta-att-spm-uncased-sentiment": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/MultiSent-E5-Pro": { "LABEL_0": {"code": 0, "name": "question", "emoji": "🤔", "color": "#60a5fa", "bg": "rgba(96,165,250,.2)", "description": "คำถาม"}, "LABEL_1": {"code": 1, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_2": {"code": 2, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_3": {"code": 3, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/Thai-sentiment-e5": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "poom-sci/WangchanBERTa-finetuned-sentiment": { "neg": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "neu": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "pos": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "SandboxBhh/sentiment-thai-text-model": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/MultiSent-E5": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "Thaweewat/wangchanberta-hyperopt-sentiment-01": { "neg": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "neu": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "pos": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "cardiffnlp/twitter-xlm-roberta-base-sentiment": { "NEGATIVE": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "NEUTRAL": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "POSITIVE": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "phoner45/wangchan-sentiment-thai-text-model": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/Sentiment-01": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/Sentiment-02": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/Sentiment-03": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/sentiment-103": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, "ZombitX64/sentimentSumdata-v1": { "LABEL_0": {"code": 0, "name": "negative", "emoji": "😢", "color": "#f87171", "bg": "rgba(248,113,113,.2)", "description": "เชิงลบ"}, "LABEL_1": {"code": 1, "name": "neutral", "emoji": "😐", "color": "#facc15", "bg": "rgba(250,204,21,.2)", "description": "เป็นกลาง"}, "LABEL_2": {"code": 2, "name": "positive", "emoji": "😊", "color": "#34d399", "bg": "rgba(52,211,153,.2)", "description": "เชิงบวก"}, }, } def get_label_info(label: str, model_name: str) -> Dict: model_mappings = MODEL_LABEL_MAPPINGS.get(model_name, {}) if label in model_mappings: return model_mappings[label] return { "code": -1, "name": label.lower(), "emoji": "🔍", "color": "#64748b", "bg": "rgba(100,116,139,.2)", "description": f"ไม่ทราบ ({label})" } # ===== Helpers ===== def split_sentences(text: str) -> List[str]: sentences = re.split(r'[.!?।\n]+', text) sentences = [s.strip() for s in sentences if s.strip() and len(s.strip()) > 2] return sentences def create_confidence_bar(score: float) -> str: percentage = int(score * 100) return f"""
{percentage}%
""" # ===== Main analyzer (HTML) — ใช้ของเดิมได้เลย ===== def analyze_text(text: str, model_name: str) -> str: if not text or not text.strip(): return """
⚠️ กรุณาใส่ข้อความที่ต้องการวิเคราะห์
""" sentences = split_sentences(text) if not sentences: return """
⚠️ ไม่พบประโยคที่สามารถวิเคราะห์ได้ กรุณาใส่ข้อความที่ยาวกว่านี้
""" try: nlp = get_nlp(model_name) except Exception as e: return f"""
เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}
""" html_parts = [f"""

🧠 ผลการวิเคราะห์ความรู้สึก

โมเดล: {model_name.split('/')[-1]}

"""] sentiment_counts = {"positive": 0, "negative": 0, "neutral": 0, "question": 0, "other": 0} total_confidence = 0 sentence_results = [] for i, sentence in enumerate(sentences, 1): try: result = nlp(sentence)[0] label = result['label']; score = float(result['score']) label_info = get_label_info(label, model_name) label_name = label_info["name"] if label_name in sentiment_counts: sentiment_counts[label_name] += 1 else: sentiment_counts["other"] += 1 total_confidence += score sentence_results.append({ 'sentence': sentence, 'label_info': label_info, 'score': score, 'index': i, 'original_label': label }) except Exception as e: logger.error(f"Error analyzing sentence {i}: {e}") sentence_results.append({'sentence': sentence, 'error': str(e), 'index': i}) html_parts.append("""
""") for r in sentence_results: if 'error' in r: html_parts.append(f"""
เกิดข้อผิดพลาดในการวิเคราะห์ประโยคที่ {r['index']}

{r['error']}

""") else: li = r['label_info']; conf = create_confidence_bar(r['score']) html_parts.append(f"""
{li['emoji']}
{li['description']} {r['original_label']} ประโยคที่ {r['index']}

"{r['sentence'][:150]}{'...' if len(r['sentence'])>150 else ''}"

ความมั่นใจ:
{conf}
""") total_sentences = len(sentences) avg_conf = total_confidence / total_sentences if total_sentences > 0 else 0 colors = {"positive":"#34d399","negative":"#f87171","neutral":"#facc15","question":"#60a5fa","other":"#64748b"} emojis = {"positive":"😊","negative":"😢","neutral":"😐","question":"🤔","other":"🔍"} chart_items = [] for s, c in sentiment_counts.items(): if c > 0: pct = (c/total_sentences)*100 chart_items.append(f"""
{emojis.get(s,'🔍')}
{s}
{c} ประโยค ({pct:.1f}%)
""") html_parts.append(f"""

📊 สรุปผลการวิเคราะห์

{total_sentences}
ประโยคทั้งหมด
{avg_conf*100:.0f}%
ความมั่นใจเฉลี่ย
{"".join(chart_items)}
""") html_parts.append("") return "".join(html_parts) # ===== NEW: คืน HTML + JSON โครงสร้าง ===== def analyze_text_with_data(text: str, model_name: str) -> Tuple[str, str]: html = analyze_text(text, model_name) sentences = split_sentences(text) if not sentences: return html, json.dumps({"model": model_name, "items": [], "summary": {}}, ensure_ascii=False) try: nlp = get_nlp(model_name) except Exception: return html, json.dumps({"model": model_name, "items": [], "summary": {}}, ensure_ascii=False) items = [] sentiment_counts = {"positive": 0, "negative": 0, "neutral": 0, "question": 0, "other": 0} for i, sentence in enumerate(sentences, 1): try: r = nlp(sentence)[0] raw_label = r["label"]; score = float(r["score"]) label_info = get_label_info(raw_label, model_name) label = label_info.get("name", "other") if label not in sentiment_counts: label = "other" sentiment_counts[label] += 1 items.append({ "index": i, "sentence": sentence, "label": label, "score": score, "raw_label": raw_label }) except Exception as e: items.append({ "index": i, "sentence": sentence, "label": "error", "score": 0.0, "raw_label": f"error: {e}" }) results_json = json.dumps({"model": model_name, "items": items, "summary": sentiment_counts}, ensure_ascii=False) return html, results_json # ===== NEW: ข้อความรวมตาม sentiment สำหรับ Copy ===== def build_copy_texts(results_json: str) -> Tuple[str, str, str, str, str]: try: data = json.loads(results_json) except Exception: return "", "", "", "", "" buckets = {"positive": [], "negative": [], "neutral": [], "question": [], "other": []} for it in data.get("items", []): lb = it.get("label", "other") if lb not in buckets: lb = "other" buckets[lb].append(f"{it.get('index','')}. {it.get('sentence','')}") j = lambda xs: "\n".join(xs) if xs else "" return j(buckets["positive"]), j(buckets["negative"]), j(buckets["neutral"]), j(buckets["question"]), j(buckets["other"]) # ===== NEW: Export CSV/XLSX ===== def export_csv(results_json: str) -> str: data = json.loads(results_json) items = data.get("items", []) if pd is None: import csv path = os.path.join(tempfile.gettempdir(), "sentiment_results.csv") with open(path, "w", encoding="utf-8", newline="") as f: w = csv.writer(f) w.writerow(["index","sentence","label","score","raw_label"]) for it in items: w.writerow([it.get("index",""), it.get("sentence",""), it.get("label",""), it.get("score",""), it.get("raw_label","")]) return path df = pd.DataFrame(items, columns=["index","sentence","label","score","raw_label"]) path = os.path.join(tempfile.gettempdir(), "sentiment_results.csv") df.to_csv(path, index=False) return path def export_xlsx(results_json: str) -> str: if pd is None: raise gr.Error("ต้องติดตั้ง pandas/openpyxl ก่อนจึงจะส่งออก .xlsx ได้") data = json.loads(results_json) items = data.get("items", []) df = pd.DataFrame(items, columns=["index","sentence","label","score","raw_label"]) path = os.path.join(tempfile.gettempdir(), "sentiment_results.xlsx") with pd.ExcelWriter(path, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="all") for s in ["positive","negative","neutral","question","other"]: sdf = df[df["label"] == s] if not sdf.empty: sdf.to_excel(writer, index=False, sheet_name=s) return path # ===== CSS (ย่อเพื่อความกระชับ) ===== CUSTOM_CSS = """ * { font-family: 'Inter','Noto Sans Thai',sans-serif !important; } body, .gradio-container { background: linear-gradient(135deg,#181f2a 0%,#232e3c 100%) !important; } .main-uxui-card { background:#232e3c;border-radius:20px;border:1.5px solid #2d3a4d;padding:24px;color:#e3e8ef; } .main-uxui-btn { padding:.9em 2em;border-radius:12px;font-weight:600;background:linear-gradient(90deg,#2563eb 0%,#1e293b 100%);color:#f8fafc;border:none; } .main-uxui-input, .main-uxui-dropdown { border:1.5px solid #2d3a4d;background:#1e2533;color:#e3e8ef;padding:14px;border-radius:10px; } .main-uxui-output { background:#1e2533;border:1.5px solid #2d3a4d;border-radius:14px;padding:18px; } """ # ===== UI ===== with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Base(), title="Sentiment Analysis") as demo: with gr.Column(elem_classes="main-uxui-card"): gr.HTML("

Sentiment Analysis

วิเคราะห์ความรู้สึกหลายภาษา + Export ไฟล์

") with gr.Row(): model_dropdown = gr.Dropdown( choices=[(desc, name) for name, desc in MODEL_LIST], # label, value value=MODEL_LIST[0][0], label="เลือกโมเดล (Model)", elem_classes="main-uxui-dropdown" ) with gr.Row(): input_box = gr.Textbox( lines=5, placeholder="พิมพ์ข้อความ (รองรับหลายประโยค แยกด้วย ., ?, ! หรือขึ้นบรรทัดใหม่)", label="ข้อความที่ต้องการวิเคราะห์", elem_classes="main-uxui-input" ) with gr.Row(): analyze_btn = gr.Button("วิเคราะห์", elem_classes="main-uxui-btn") clear_btn = gr.Button("ล้างผลลัพธ์", elem_classes="main-uxui-btn") with gr.Tab("ผลลัพธ์"): output_html = gr.HTML(label="ผลลัพธ์", elem_classes="main-uxui-output") with gr.Tab("Copy ตาม Sentiment"): gr.Markdown("**คัดลอกข้อความที่จัดกลุ่มแล้วตาม sentiment**") pos_copy = gr.Textbox(label="😊 Positive", lines=8, show_copy_button=True) neg_copy = gr.Textbox(label="😢 Negative", lines=8, show_copy_button=True) neu_copy = gr.Textbox(label="😐 Neutral", lines=8, show_copy_button=True) q_copy = gr.Textbox(label="🤔 Question", lines=6, show_copy_button=True) other_copy = gr.Textbox(label="🔍 Other/Unknown", lines=6, show_copy_button=True) with gr.Tab("Export"): results_json = gr.Textbox(visible=False) with gr.Row(): export_csv_btn = gr.Button("⬇️ Export CSV", elem_classes="main-uxui-btn") export_xlsx_btn = gr.Button("⬇️ Export Excel (.xlsx)", elem_classes="main-uxui-btn") export_file = gr.File(label="ดาวน์โหลดไฟล์ที่นี่", interactive=False) gr.Examples( examples=[ ["วันนี้อากาศดีมากๆ รู้สึกสดชื่นและมีความสุขมาก!"], ["เศร้ามากเลยวันนี้ งานเยอะเกินไป"], ["อาหารอร่อยดี แต่บริการช้ามาก"], ["คุณคิดอย่างไรกับเศรษฐกิจไทย?"], ["I love this product! It's amazing."], ["이 제품은 별로예요. 다시는 안 살 거예요."], ["This is the worst experience I've ever had."] ], inputs=input_box, label="ตัวอย่างข้อความ", ) # ===== Callbacks ===== def on_analyze(text, model): html, rjson = analyze_text_with_data(text, model) pos, neg, neu, qn, other = build_copy_texts(rjson) return html, rjson, pos, neg, neu, qn, other analyze_btn.click(on_analyze, [input_box, model_dropdown], [output_html, results_json, pos_copy, neg_copy, neu_copy, q_copy, other_copy]) input_box.submit(on_analyze, [input_box, model_dropdown], [output_html, results_json, pos_copy, neg_copy, neu_copy, q_copy, other_copy]) model_dropdown.change(on_analyze, [input_box, model_dropdown], [output_html, results_json, pos_copy, neg_copy, neu_copy, q_copy, other_copy]) clear_btn.click(lambda: ("", "", "", "", "", "", ""), None, [output_html, results_json, pos_copy, neg_copy, neu_copy, q_copy, other_copy]) export_csv_btn.click(export_csv, inputs=results_json, outputs=export_file) export_xlsx_btn.click(export_xlsx, inputs=results_json, outputs=export_file) # ===== Launch ===== if __name__ == "__main__": demo.queue(max_size=50, default_concurrency_limit=10).launch( server_name="0.0.0.0", server_port=7860, share=True, show_error=True, show_api=False, quiet=False, ssl_verify=False, app_kwargs={"docs_url": None, "redoc_url": None}, )