Spaces:

dzehuggingface
/

SmallChat-FxnCaller

Sleeping

File size: 2,863 Bytes

2207b59
4ecd9e6
dbf3c9a
4ecd9e6
dbf3c9a
2207b59
dbf3c9a
 
 
 
 
 
2207b59
2ffa472
 
2207b59
 
 
 
 
4ecd9e6
771f748
 
 
2ffa472
5ebc9ea
bff1060
4b234e7
12390aa
a352354
60a5170
444d328
 
 
50dcc80
 
 
 
 
 
 
 
bff1060
 
 
 
444d328
 
 
bff1060
dbf3c9a
8cf437b
dbf3c9a
 
 
2207b59
6f5eb81
2207b59
8cf437b
dbf3c9a
 
2207b59
4ecd9e6
5ebc9ea
444d328
 
 
 
8cf437b
2207b59
8cf437b
444d328
 
 
c429885
 
 
9013609
5ebc9ea
20ed78d
5ebc9ea
2ffa472
20ed78d
d564e7e
462f015

import gradio as gr
import torch
from transformers import pipeline

model_name = "amusktweewt/tiny-model-700M-chat"

chatbot = pipeline(
    "text-generation",
    model=model_name,
    device=0 if torch.cuda.is_available() else -1
)

system_prompt_default = (
    "You are a highly intelligent and helpful AI assistant named Tiny Chat. "
    "Always refer to yourself like that. "
    "Your responses should be clear, concise, and accurate. "
    "Always prioritize user needs, provide well-structured answers, "
    "and maintain a friendly yet professional tone. "
    "Adapt to the user's preferences and communication style."
)

chatbot.tokenizer.bos_token = "<sos>"
chatbot.tokenizer.eos_token = "<|endoftext|>"

def chat_fxn_caller(message, history, temperature=0.6, top_p=0.95, max_tokens=128):

    messages = []

    #messages.append({"role": "system", "content": "You are a highly intelligent and helpful AI assistant named Tiny Chat, developed by amusktweewt. Always refer to yourself like that. Your responses should be clear, concise, and accurate. Always prioritize user needs, provide well-structured answers, and maintain a friendly yet professional tone. Adapt to the user's preferences and communication style. When needed, ask clarifying questions to ensure the best response. Be honest about limitations and avoid making assumptions. Keep interactions engaging, informative, and efficient."})
    #history.append({"role": "system", "content": "You're a chatbot"})

    print("history")
    print(history)

    if history != []:
        for turn in history:
            # Each turn is [user_msg, bot_msg] or [user_msg] depending on history length
            if isinstance(turn, list):
                if len(turn) > 0:
                    messages.append({"role": "user", "content": str(turn[0])})
                if len(turn) > 1:
                    messages.append({"role": "assistant", "content": str(turn[1])})

    # Append current user message
    messages.append({"role": "user", "content": str(message)})

    print("messages")
    print(messages)

    prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)

    # Generate response
    response = chatbot(
        prompt,
        do_sample=True,
        max_new_tokens=max_tokens,
        top_k=30,
        temperature=temperature,
        top_p=top_p,
        num_return_sequences=1,
        repetition_penalty=1.1,
        pad_token_id=chatbot.tokenizer.eos_token_id
    )

    print("response:")
    print(response)
    

    # Extract only new text
    generated = response[0]["generated_text"][len(prompt):].strip()

    print("generated")
    print(generated)
    
    return generated
    #messages.append({"role": "assistant", "content": generated})
    #return messages


demo = gr.ChatInterface(
    chat_fxn_caller,
    type="messages"
)

demo.launch()