File size: 1,132 Bytes
6677176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env bash
set -e

CSV_PATH=${CSV_PATH:-/app/export_artifacts/muril_multilingual_dataset.csv}
EMBED_PATH=${OUT_EMBED_PATH:-/app/export_artifacts/answer_embeddings.pt}
MODEL_DIR=${MODEL_DIR:-$HF_REPO}
HF_REPO=${HF_REPO:-Sp2503/Finetuned-multilingualdataset-MuriL-model}
FORCE_REGEN=${FORCE_REGEN:-false}
UPLOAD_BACK=${UPLOAD_BACK:-false}

echo "Entrypoint: CSV_PATH=$CSV_PATH EMBED_PATH=$EMBED_PATH MODEL_DIR=$MODEL_DIR FORCE_REGEN=$FORCE_REGEN"

mkdir -p "$(dirname "$EMBED_PATH")"

need_regen=false
if [ ! -f "$EMBED_PATH" ]; then
  echo "Embeddings not found -> will regenerate."
  need_regen=true
fi

if [ "$FORCE_REGEN" = "true" ] || [ "$FORCE_REGEN" = "1" ]; then
  echo "FORCE_REGEN -> will regenerate embeddings."
  need_regen=true
fi

if [ "$need_regen" = "true" ]; then
  export MODEL_DIR="${MODEL_DIR}"
  export CSV_PATH="${CSV_PATH}"
  export OUT_EMBED_PATH="${EMBED_PATH}"
  export HF_REPO="${HF_REPO}"
  export UPLOAD_BACK="${UPLOAD_BACK}"
  python /app/regenerate_embeddings.py
  echo "Regeneration finished."
else
  echo "Skipping regeneration."
fi

exec uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}