BaoKhuong commited on
Commit
7da862f
·
verified ·
1 Parent(s): 7df24ba

Upload 2 files

Browse files
Files changed (2) hide show
  1. app1 - claude-4.1-opus.py +11 -0
  2. app2 - gpt-5-high.py +2 -2
app1 - claude-4.1-opus.py CHANGED
@@ -143,6 +143,17 @@ def _get_llama_instance() -> Llama:
143
  return _LLAMA_INSTANCE
144
  if Llama is None:
145
  raise RuntimeError("llama_cpp is not installed. This app variant requires llama-cpp-python.")
 
 
 
 
 
 
 
 
 
 
 
146
  model_path = _resolve_fin_o1_gguf_path()
147
 
148
  def try_make_llama(ctx_size: int) -> Llama | None:
 
143
  return _LLAMA_INSTANCE
144
  if Llama is None:
145
  raise RuntimeError("llama_cpp is not installed. This app variant requires llama-cpp-python.")
146
+ # Basic version guard: Fin-o1-14B GGUFs require a recent llama.cpp
147
+ try:
148
+ ver = getattr(llama_cpp_mod, "__version__", "0.0.0") or "0.0.0"
149
+ def _ver_tuple(s: str):
150
+ parts = [p for p in s.split(".") if p.isdigit()]
151
+ return tuple(int(p) for p in (parts + ["0", "0"])[:3])
152
+ if _ver_tuple(ver) < _ver_tuple("0.3.0"):
153
+ print(f"⚠️ Detected llama-cpp-python {ver}. Recommend >= 0.3.0 for these GGUFs.")
154
+ print(" Try: pip install -U llama-cpp-python")
155
+ except Exception:
156
+ pass
157
  model_path = _resolve_fin_o1_gguf_path()
158
 
159
  def try_make_llama(ctx_size: int) -> Llama | None:
app2 - gpt-5-high.py CHANGED
@@ -28,9 +28,9 @@ warnings.filterwarnings('ignore', category=FutureWarning)
28
 
29
  # Local GGUF model config (CPU-only HF Spaces ~16GB RAM)
30
  # Default to a non-Qwen LLaMA-arch model to ensure compatibility
31
- GGUF_REPO = os.getenv("GGUF_REPO", "MaziyarPanahi/Llama-3.1-8B-Instruct-GGUF")
32
  # Default to lighter quant to reduce RAM (can override via env)
33
- GGUF_FILENAME = os.getenv("GGUF_FILENAME", "Llama-3.1-8B-Instruct.Q4_K_S.gguf")
34
  N_CTX = int(os.getenv("LLAMA_N_CTX", "2048"))
35
  N_THREADS = int(os.getenv("LLAMA_N_THREADS", str(os.cpu_count() or 4)))
36
  N_BATCH = int(os.getenv("LLAMA_N_BATCH", "128"))
 
28
 
29
  # Local GGUF model config (CPU-only HF Spaces ~16GB RAM)
30
  # Default to a non-Qwen LLaMA-arch model to ensure compatibility
31
+ GGUF_REPO = os.getenv("GGUF_REPO", "QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF")
32
  # Default to lighter quant to reduce RAM (can override via env)
33
+ GGUF_FILENAME = os.getenv("GGUF_FILENAME", "Meta-Llama-3.1-8B-Instruct.Q4_K_S.gguf")
34
  N_CTX = int(os.getenv("LLAMA_N_CTX", "2048"))
35
  N_THREADS = int(os.getenv("LLAMA_N_THREADS", str(os.cpu_count() or 4)))
36
  N_BATCH = int(os.getenv("LLAMA_N_BATCH", "128"))