|
|
import gradio as gr |
|
|
from huggingface_hub import hf_hub_download |
|
|
from llama_cpp import Llama |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
REPO_ID = "astegaras/merged_kaggle" |
|
|
FILENAME = "model-q4_k_m.gguf" |
|
|
|
|
|
print("Downloading GGUF model...") |
|
|
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) |
|
|
print("Model downloaded:", model_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm = Llama( |
|
|
model_path=model_path, |
|
|
n_ctx=4096, |
|
|
n_threads=8, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def respond(message, history): |
|
|
prompt = "" |
|
|
|
|
|
|
|
|
for user_msg, bot_msg in history: |
|
|
prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" |
|
|
prompt += f"User: {message}\nAssistant:" |
|
|
|
|
|
|
|
|
output = llm( |
|
|
prompt, |
|
|
max_tokens=256, |
|
|
temperature=0.7, |
|
|
top_p=0.9, |
|
|
stop=["User:", "Assistant:"] |
|
|
) |
|
|
|
|
|
assistant_reply = output["choices"][0]["text"].strip() |
|
|
return assistant_reply |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gr.ChatInterface( |
|
|
fn=respond, |
|
|
title="My Llama.cpp GGUF Model", |
|
|
description="Chat with your fine-tuned GGUF model!", |
|
|
).launch() |
|
|
|
|
|
|