Spaces:

mic3333
/

summllama-demo

Sleeping

App Files Files Community

mic3333 commited on Nov 3

Commit

3bb0195

verified ·

1 Parent(s): 70f195f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -19

app.py CHANGED Viewed

@@ -1,35 +1,74 @@
-import gradio as gr
-import requests
 import os
-API_URL = "https://api-inference.huggingface.co/models/DISLab/SummLlama3.2-3B"
 def summarize(text):
-    headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
-    payload = {"inputs": text}
     try:
-        response = requests.post(API_URL, headers=headers, json=payload)
-        response.raise_for_status()
-        result = response.json()
-        # Handle different response formats
-        if isinstance(result, list) and len(result) > 0:
-            return result[0].get('summary_text', result[0].get('generated_text', str(result[0])))
-        elif isinstance(result, dict):
-            return result.get('summary_text', result.get('generated_text', str(result)))
-        else:
-            return str(result)
     except Exception as e:
         return f"Error: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=summarize,
-    inputs=gr.Textbox(lines=10, placeholder="Enter text to summarize..."),
-    outputs=gr.Textbox(label="Summary"),
     title="SummLlama3.2-3B Summarization",
-    description="Test the DISLab/SummLlama3.2-3B model using Hugging Face Inference API"
 )
 if __name__ == "__main__":

 import os
+os.environ["SPACES_ZERO_GPU"] = "false"
+import gradio as gr
+from transformers import pipeline, AutoTokenizer
+import torch
+# Load model and tokenizer
+model_name = "DISLab/SummLlama3.2-3B"
+print(f"Loading model: {model_name}")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+pipe = pipeline(
+    "text-generation",
+    model=model_name,
+    tokenizer=tokenizer,
+    device_map="auto",
+    torch_dtype=torch.float16
+)
+print("Model loaded successfully!")
+def format_chat_template(document):
+    """Format input using the recommended template from model card"""
+    instruction = "Please summarize the input document."
+    row_json = [{
+        "role": "user",
+        "content": f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{document}\n\n### Response:\n"
+    }]
+    return tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=False)
 def summarize(text):
+    """Generate summary using the model"""
     try:
+        # Format input with recommended template
+        formatted_input = format_chat_template(text)
+        # Generate summary
+        output = pipe(
+            formatted_input,
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.3,
+            top_p=0.9,
+            return_full_text=False
+        )
+        # Extract the generated summary
+        summary = output[0]['generated_text'].strip()
+        return summary
     except Exception as e:
         return f"Error: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=summarize,
+    inputs=gr.Textbox(
+        lines=10,
+        placeholder="Enter text to summarize...",
+        label="Input Text"
+    ),
+    outputs=gr.Textbox(
+        label="Summary",
+        lines=5
+    ),
     title="SummLlama3.2-3B Summarization",
+    description="Test the DISLab/SummLlama3.2-3B model - a specialized summarization model trained with DPO",
+    examples=[
+        ["Artificial intelligence has made remarkable progress in recent years, particularly in natural language processing. Large language models can now understand context, generate human-like text, and perform complex reasoning tasks. These advances have enabled applications ranging from chatbots to code generation tools, transforming how we interact with technology."]
+    ]
 )
 if __name__ == "__main__":