Maaroufabousaleh commited on
Commit
010dba3
·
1 Parent(s): 58c5ce0

Refactor Gradio entrypoint script to improve DATA_DIR determination and enhance NLTK download error handling

Browse files
Files changed (1) hide show
  1. deployment/gradio_entrypoint.sh +40 -26
deployment/gradio_entrypoint.sh CHANGED
@@ -3,48 +3,62 @@ set -u
3
 
4
  echo "Starting AdvisorAI Data Pipeline with Gradio..."
5
 
6
- # Determine writable data dir via existing Python config logic
7
- NLTK_DIR=$(python - <<'PY'
8
  import os
9
  try:
10
- from src.config import DATA_DIR
11
  except Exception:
12
- # fallback order
13
- for p in ['/data', '/app/data', '/tmp']:
14
- try:
15
- os.makedirs(p, exist_ok=True)
16
- test = os.path.join(p, '.wtest')
17
- open(test,'w').close(); os.remove(test)
18
- DATA_DIR = p
19
- break
20
- except Exception:
21
- continue
22
- else:
23
- DATA_DIR = '/tmp'
24
 
25
- nl = os.path.join(DATA_DIR, 'nltk_data')
26
  os.makedirs(nl, exist_ok=True)
27
- print(nl)
28
  PY
29
- )
30
 
 
31
  export NLTK_DATA="$NLTK_DIR"
 
32
  echo "NLTK_DATA set to: $NLTK_DATA"
33
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Best-effort NLTK downloads (do not fail on errors)
35
  python - <<'PY'
36
  import os
37
  print('Preparing NLTK into', os.environ.get('NLTK_DATA'))
38
  try:
39
- import nltk
40
- for pkg in ['punkt', 'stopwords', 'vader_lexicon']:
41
- try:
42
- nltk.download(pkg, download_dir=os.environ.get('NLTK_DATA'), quiet=True)
43
- print('Downloaded', pkg)
44
- except Exception as e:
45
- print('NLTK download failed for', pkg, e)
46
  except Exception as e:
47
- print('NLTK import failed:', e)
48
  PY
49
 
50
  echo "Starting services..."
 
3
 
4
  echo "Starting AdvisorAI Data Pipeline with Gradio..."
5
 
6
+ # Determine a writable DATA_DIR using the app's own logic, export it, and prepare NLTK dir
7
+ read -r DATA_DIR NLTK_DIR <<< "$(python - <<'PY'
8
  import os
9
  try:
10
+ from src.config import DATA_DIR as BASE
11
  except Exception:
12
+ # fallback order
13
+ for p in ['/data', '/app/data', '/tmp']:
14
+ try:
15
+ os.makedirs(p, exist_ok=True)
16
+ test = os.path.join(p, '.wtest')
17
+ open(test,'w').close(); os.remove(test)
18
+ BASE = p
19
+ break
20
+ except Exception:
21
+ continue
22
+ else:
23
+ BASE = '/tmp'
24
 
25
+ nl = os.path.join(BASE, 'nltk_data')
26
  os.makedirs(nl, exist_ok=True)
27
+ print(BASE, nl)
28
  PY
29
+ )"
30
 
31
+ export DATA_DIR
32
  export NLTK_DATA="$NLTK_DIR"
33
+ echo "DATA_DIR set to: $DATA_DIR"
34
  echo "NLTK_DATA set to: $NLTK_DATA"
35
 
36
+ # Optionally fetch historical data from Filebase/S3 before starting services
37
+ if [ "${SKIP_FILEBASE_FETCH:-0}" != "1" ]; then
38
+ if [ -n "${FILEBASE_BUCKET:-}" ] && [ -n "${FILEBASE_ACCESS_KEY:-}" ] && [ -n "${FILEBASE_SECRET_KEY:-}" ]; then
39
+ echo "Fetching historical data from Filebase into $DATA_DIR ..."
40
+ python /app/deployment/fetch_filebase.py --base-dir "$DATA_DIR" || echo "[WARN] Filebase fetch failed (continuing startup)."
41
+ else
42
+ echo "[INFO] Skipping Filebase fetch: credentials or bucket not configured."
43
+ fi
44
+ else
45
+ echo "[INFO] SKIP_FILEBASE_FETCH=1 -> skipping Filebase fetch."
46
+ fi
47
+
48
  # Best-effort NLTK downloads (do not fail on errors)
49
  python - <<'PY'
50
  import os
51
  print('Preparing NLTK into', os.environ.get('NLTK_DATA'))
52
  try:
53
+ import nltk
54
+ for pkg in ['punkt', 'stopwords', 'vader_lexicon']:
55
+ try:
56
+ nltk.download(pkg, download_dir=os.environ.get('NLTK_DATA'), quiet=True)
57
+ print('Downloaded', pkg)
58
+ except Exception as e:
59
+ print('NLTK download failed for', pkg, e)
60
  except Exception as e:
61
+ print('NLTK import failed:', e)
62
  PY
63
 
64
  echo "Starting services..."