Spaces:
Runtime error
Runtime error
MORE PARAMETERS
Browse files- app.py +15 -12
- llm_backend.py +1 -4
app.py
CHANGED
|
@@ -15,18 +15,21 @@ import sys
|
|
| 15 |
llm = LlmBackend()
|
| 16 |
_lock = threading.Lock()
|
| 17 |
|
| 18 |
-
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT'
|
| 19 |
-
CONTEXT_SIZE = int(os.environ.get('CONTEXT_SIZE', '500'))
|
| 20 |
-
HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR'
|
| 21 |
-
USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', '').lower() ==
|
| 22 |
-
ENABLE_GPU = os.environ.get('ENABLE_GPU', '').lower() ==
|
| 23 |
-
GPU_LAYERS = int(os.environ.get('GPU_LAYERS', '0'))
|
| 24 |
-
CHAT_FORMAT = os.environ.get('CHAT_FORMAT'
|
| 25 |
-
REPO_NAME = os.environ.get('REPO_NAME'
|
| 26 |
-
MODEL_NAME = os.environ.get('MODEL_NAME'
|
| 27 |
-
DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL'
|
| 28 |
-
DATA_FILENAME = os.environ.get('DATA_FILENAME'
|
| 29 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# Create a lock object
|
| 32 |
lock = threading.Lock()
|
|
@@ -174,5 +177,5 @@ if __name__ == "__main__":
|
|
| 174 |
# scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
|
| 175 |
# scheduler.start()
|
| 176 |
|
| 177 |
-
app.run(host=
|
| 178 |
|
|
|
|
| 15 |
llm = LlmBackend()
|
| 16 |
_lock = threading.Lock()
|
| 17 |
|
| 18 |
+
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', default="Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык.")
|
| 19 |
+
CONTEXT_SIZE = int(os.environ.get('CONTEXT_SIZE', default='500'))
|
| 20 |
+
HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR', default='/home/user/app/.cache')
|
| 21 |
+
USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', default='False').lower() == 'true'
|
| 22 |
+
ENABLE_GPU = os.environ.get('ENABLE_GPU', default='False').lower() == 'true'
|
| 23 |
+
GPU_LAYERS = int(os.environ.get('GPU_LAYERS', default='0'))
|
| 24 |
+
CHAT_FORMAT = os.environ.get('CHAT_FORMAT', default='llama-2')
|
| 25 |
+
REPO_NAME = os.environ.get('REPO_NAME', default='IlyaGusev/saiga2_7b_gguf')
|
| 26 |
+
MODEL_NAME = os.environ.get('MODEL_NAME', default='model-q4_K.gguf')
|
| 27 |
+
DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL', default="https://huggingface.co/datasets/muryshev/saiga-chat")
|
| 28 |
+
DATA_FILENAME = os.environ.get('DATA_FILENAME', default="data-saiga-cuda-release.xml")
|
| 29 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 30 |
+
APP_HOST = os.environ.get('APP_HOST', default='0.0.0.0')
|
| 31 |
+
APP_PORT = int(os.environ.get('APP_PORT', default='7860'))
|
| 32 |
+
FLASK_THREADED = os.environ.get('FLASK_THREADED', default='False').lower() == "true"
|
| 33 |
|
| 34 |
# Create a lock object
|
| 35 |
lock = threading.Lock()
|
|
|
|
| 177 |
# scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
|
| 178 |
# scheduler.start()
|
| 179 |
|
| 180 |
+
app.run(host=APP_HOST, port=APP_PORT, debug=False, threaded=FLASK_THREADED)
|
| 181 |
|
llm_backend.py
CHANGED
|
@@ -176,7 +176,4 @@ class LlmBackend:
|
|
| 176 |
except Exception as e:
|
| 177 |
log.error('generate_tokens - error')
|
| 178 |
log.error(e)
|
| 179 |
-
yield b'' # End of chunk
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
| 176 |
except Exception as e:
|
| 177 |
log.error('generate_tokens - error')
|
| 178 |
log.error(e)
|
| 179 |
+
yield b'' # End of chunk
|
|
|
|
|
|
|
|
|