import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer import torch st.set_page_config(page_title="OpenChat 3.5 - HF Space", page_icon="🤖") st.title("🤖 YahyaGPT") @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("openchat/openchat-3.5-1210") model = AutoModelForCausalLM.from_pretrained( "openchat/openchat-3.5-1210", torch_dtype=torch.float16, device_map="auto" ) return tokenizer, model tokenizer, model = load_model() if "messages" not in st.session_state: st.session_state.messages = [] for msg in st.session_state.messages: st.chat_message(msg["role"]).write(msg["content"]) prompt = st.chat_input("Ask Yahya anything...") if prompt: st.chat_message("user").write(prompt) st.session_state.messages.append({"role": "user", "content": prompt}) full_prompt = "You are a helpful assistant.\n" for msg in st.session_state.messages: role = msg["role"] content = msg["content"] if role == "user": full_prompt += f"User: {content}\n" else: full_prompt += f"Assistant: {content}\n" full_prompt += "Assistant:" inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device) output = model.generate( **inputs, max_new_tokens=300, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(output[0], skip_special_tokens=True) reply = response.split("Assistant:")[-1].strip() st.chat_message("assistant").write(reply) st.session_state.messages.append({"role": "assistant", "content": reply})