Maaroufabousaleh
commited on
Commit
·
010dba3
1
Parent(s):
58c5ce0
Refactor Gradio entrypoint script to improve DATA_DIR determination and enhance NLTK download error handling
Browse files- deployment/gradio_entrypoint.sh +40 -26
deployment/gradio_entrypoint.sh
CHANGED
|
@@ -3,48 +3,62 @@ set -u
|
|
| 3 |
|
| 4 |
echo "Starting AdvisorAI Data Pipeline with Gradio..."
|
| 5 |
|
| 6 |
-
# Determine writable
|
| 7 |
-
NLTK_DIR
|
| 8 |
import os
|
| 9 |
try:
|
| 10 |
-
|
| 11 |
except Exception:
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
|
| 25 |
-
nl = os.path.join(
|
| 26 |
os.makedirs(nl, exist_ok=True)
|
| 27 |
-
print(nl)
|
| 28 |
PY
|
| 29 |
-
)
|
| 30 |
|
|
|
|
| 31 |
export NLTK_DATA="$NLTK_DIR"
|
|
|
|
| 32 |
echo "NLTK_DATA set to: $NLTK_DATA"
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Best-effort NLTK downloads (do not fail on errors)
|
| 35 |
python - <<'PY'
|
| 36 |
import os
|
| 37 |
print('Preparing NLTK into', os.environ.get('NLTK_DATA'))
|
| 38 |
try:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
except Exception as e:
|
| 47 |
-
|
| 48 |
PY
|
| 49 |
|
| 50 |
echo "Starting services..."
|
|
|
|
| 3 |
|
| 4 |
echo "Starting AdvisorAI Data Pipeline with Gradio..."
|
| 5 |
|
| 6 |
+
# Determine a writable DATA_DIR using the app's own logic, export it, and prepare NLTK dir
|
| 7 |
+
read -r DATA_DIR NLTK_DIR <<< "$(python - <<'PY'
|
| 8 |
import os
|
| 9 |
try:
|
| 10 |
+
from src.config import DATA_DIR as BASE
|
| 11 |
except Exception:
|
| 12 |
+
# fallback order
|
| 13 |
+
for p in ['/data', '/app/data', '/tmp']:
|
| 14 |
+
try:
|
| 15 |
+
os.makedirs(p, exist_ok=True)
|
| 16 |
+
test = os.path.join(p, '.wtest')
|
| 17 |
+
open(test,'w').close(); os.remove(test)
|
| 18 |
+
BASE = p
|
| 19 |
+
break
|
| 20 |
+
except Exception:
|
| 21 |
+
continue
|
| 22 |
+
else:
|
| 23 |
+
BASE = '/tmp'
|
| 24 |
|
| 25 |
+
nl = os.path.join(BASE, 'nltk_data')
|
| 26 |
os.makedirs(nl, exist_ok=True)
|
| 27 |
+
print(BASE, nl)
|
| 28 |
PY
|
| 29 |
+
)"
|
| 30 |
|
| 31 |
+
export DATA_DIR
|
| 32 |
export NLTK_DATA="$NLTK_DIR"
|
| 33 |
+
echo "DATA_DIR set to: $DATA_DIR"
|
| 34 |
echo "NLTK_DATA set to: $NLTK_DATA"
|
| 35 |
|
| 36 |
+
# Optionally fetch historical data from Filebase/S3 before starting services
|
| 37 |
+
if [ "${SKIP_FILEBASE_FETCH:-0}" != "1" ]; then
|
| 38 |
+
if [ -n "${FILEBASE_BUCKET:-}" ] && [ -n "${FILEBASE_ACCESS_KEY:-}" ] && [ -n "${FILEBASE_SECRET_KEY:-}" ]; then
|
| 39 |
+
echo "Fetching historical data from Filebase into $DATA_DIR ..."
|
| 40 |
+
python /app/deployment/fetch_filebase.py --base-dir "$DATA_DIR" || echo "[WARN] Filebase fetch failed (continuing startup)."
|
| 41 |
+
else
|
| 42 |
+
echo "[INFO] Skipping Filebase fetch: credentials or bucket not configured."
|
| 43 |
+
fi
|
| 44 |
+
else
|
| 45 |
+
echo "[INFO] SKIP_FILEBASE_FETCH=1 -> skipping Filebase fetch."
|
| 46 |
+
fi
|
| 47 |
+
|
| 48 |
# Best-effort NLTK downloads (do not fail on errors)
|
| 49 |
python - <<'PY'
|
| 50 |
import os
|
| 51 |
print('Preparing NLTK into', os.environ.get('NLTK_DATA'))
|
| 52 |
try:
|
| 53 |
+
import nltk
|
| 54 |
+
for pkg in ['punkt', 'stopwords', 'vader_lexicon']:
|
| 55 |
+
try:
|
| 56 |
+
nltk.download(pkg, download_dir=os.environ.get('NLTK_DATA'), quiet=True)
|
| 57 |
+
print('Downloaded', pkg)
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print('NLTK download failed for', pkg, e)
|
| 60 |
except Exception as e:
|
| 61 |
+
print('NLTK import failed:', e)
|
| 62 |
PY
|
| 63 |
|
| 64 |
echo "Starting services..."
|