Spaces:

dzehuggingface
/

SmallChat-FxnCaller

Sleeping

DylanZimmer

Sanity check - error without system prompt?

a352354 4 months ago

2.86 kB

	import gradio as gr
	import torch
	from transformers import pipeline

	model_name = "amusktweewt/tiny-model-700M-chat"

	chatbot = pipeline(
	"text-generation",
	model=model_name,
	device=0 if torch.cuda.is_available() else -1
	)

	system_prompt_default = (
	"You are a highly intelligent and helpful AI assistant named Tiny Chat. "
	"Always refer to yourself like that. "
	"Your responses should be clear, concise, and accurate. "
	"Always prioritize user needs, provide well-structured answers, "
	"and maintain a friendly yet professional tone. "
	"Adapt to the user's preferences and communication style."
	)

	chatbot.tokenizer.bos_token = "<sos>"
	chatbot.tokenizer.eos_token = "<\|endoftext\|>"

	def chat_fxn_caller(message, history, temperature=0.6, top_p=0.95, max_tokens=128):

	messages = []

	#messages.append({"role": "system", "content": "You are a highly intelligent and helpful AI assistant named Tiny Chat, developed by amusktweewt. Always refer to yourself like that. Your responses should be clear, concise, and accurate. Always prioritize user needs, provide well-structured answers, and maintain a friendly yet professional tone. Adapt to the user's preferences and communication style. When needed, ask clarifying questions to ensure the best response. Be honest about limitations and avoid making assumptions. Keep interactions engaging, informative, and efficient."})
	#history.append({"role": "system", "content": "You're a chatbot"})

	print("history")
	print(history)

	if history != []:
	for turn in history:
	# Each turn is [user_msg, bot_msg] or [user_msg] depending on history length
	if isinstance(turn, list):
	if len(turn) > 0:
	messages.append({"role": "user", "content": str(turn[0])})
	if len(turn) > 1:
	messages.append({"role": "assistant", "content": str(turn[1])})

	# Append current user message
	messages.append({"role": "user", "content": str(message)})

	print("messages")
	print(messages)

	prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)

	# Generate response
	response = chatbot(
	prompt,
	do_sample=True,
	max_new_tokens=max_tokens,
	top_k=30,
	temperature=temperature,
	top_p=top_p,
	num_return_sequences=1,
	repetition_penalty=1.1,
	pad_token_id=chatbot.tokenizer.eos_token_id
	)

	print("response:")
	print(response)


	# Extract only new text
	generated = response[0]["generated_text"][len(prompt):].strip()

	print("generated")
	print(generated)

	return generated
	#messages.append({"role": "assistant", "content": generated})
	#return messages


	demo = gr.ChatInterface(
	chat_fxn_caller,
	type="messages"
	)

	demo.launch()