| import gradio as gr | |
| from transformers import pipeline | |
| # Choose a lightweight model for fast inference on Hugging Face Spaces free tier. | |
| # 'google/gemma-3-270m' is an excellent choice. | |
| # Other options include 'Qwen/Qwen3-0.6B' | |
| model_name = "microsoft/Phi-3-mini-4k-instruct" | |
| pipe = pipeline("text-generation", model=model_name) | |
| def chatbot(message, history): | |
| """ | |
| This function generates a response from the model and handles the conversation history. | |
| """ | |
| # The history parameter in gradio.ChatInterface is a list of lists: | |
| # [[user_message_1, bot_response_1], [user_message_2, bot_response_2], ...] | |
| # We combine the current message and the conversation history into a single prompt. | |
| prompt = "" | |
| for user_msg, bot_msg in history: | |
| prompt += f"User: {user_msg}\nBot: {bot_msg}\n" | |
| prompt += f"User: {message}\nBot: " | |
| # Generate the response using the text-generation pipeline. | |
| # We've increased max_new_tokens for longer responses and added other parameters | |
| # to improve generation quality. | |
| response = pipe( | |
| prompt, | |
| max_new_tokens=150, | |
| return_full_text=False, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_k=50, | |
| top_p=0.95, | |
| repetition_penalty=1.2 | |
| ) | |
| # Extract the generated text and clean up any extra whitespace or newlines. | |
| generated_text = response[0]['generated_text'].strip() | |
| return generated_text | |
| # Create the Gradio chat interface. | |
| # The `gr.ChatInterface` component simplifies the creation of a chat UI. | |
| # It automatically handles the chat history and provides a text input and a submit button. | |
| demo = gr.ChatInterface( | |
| fn=chatbot, | |
| title=f"Chatbot powered by JvAI", | |
| description="Ask me anything!" | |
| ) | |
| # Launch the Gradio app. | |
| if __name__ == "__main__": | |
| demo.launch() | |