Spaces:
Running
Running
| # =========================================================================== | |
| # remote_sync.sh — Sync UniDriveVLA NVFP4 resources to the quantization server | |
| # =========================================================================== | |
| # Uploads (via scp) everything needed to run NVFP4 quantization on the remote | |
| # server, which has NO internet access to huggingface.co (but pip works via the | |
| # Tencent cloud mirror): | |
| # 1. HF model cache — owl10/UniDriveVLA_Nusc_Base_Stage1 (bf16 weights, ~4 GB) | |
| # 2. HF dataset cache — OpenDriveLab/DriveLM (v1_1_train_nus.json + image zips, ~4.2 GB) | |
| # 3. Code — unidrive_vla_nusc_base_quantize_all.py + evaluation.py | |
| # 4. Env scripts — prepare_env.sh (run on the server after sync) | |
| # | |
| # nuScenes images: transferred as the DriveLM image zips | |
| # (drivelm_nus_imgs_{train,val}.zip, ~4.2 GB total). They are downloaded to the | |
| # local HF cache on first run (if missing) and uploaded as dataset blobs; the | |
| # server unzips them into nuscenes/samples/CAM_*/ so load_drivelm_dataset | |
| # resolves the multi-view image paths referenced by the JSON. Without these, | |
| # calibration/evaluation degrade to text-only input; with them, NVFP4 AWQ | |
| # calibration sees real multi-view activations. | |
| # | |
| # Idempotent: each scp is guarded by a remote existence check, so re-running the | |
| # script after a partial/interrupted transfer only uploads what is missing. A single | |
| # SSH ControlMaster connection is opened so the password is entered once even though | |
| # we run many ssh/scp commands. | |
| # | |
| # Connection (interactive password — NOT written into this script): | |
| # ssh -p 10914 root@43.180.252.174 | |
| # Server project dir: /mnt/vepfs/share/GW00387266/MODULES_PLAY | |
| # | |
| # Usage: | |
| # bash remote_sync.sh # upload everything missing, then prompt to run prepare_env.sh | |
| # SKIP_PREPARE=1 bash remote_sync.sh # upload only, do not run prepare_env.sh | |
| # =========================================================================== | |
| set -euo pipefail | |
| # --------------------------------------------------------------------------- | |
| # Connection config | |
| # --------------------------------------------------------------------------- | |
| REMOTE_HOST="10.240.97.245" | |
| REMOTE_PORT="22" | |
| REMOTE_USER="root" | |
| REMOTE_PROJECT="/mnt/vepfs/share/GW00387266/MODULES_PLAY" | |
| # Remote HF cache root. HF libs read $HF_HOME/hub (default ~/.cache/huggingface). | |
| # We place the caches under the project dir (which has 1.1 PB free on /mnt/vepfs) | |
| # rather than /root (200 GB), then point HF_HOME there at runtime. | |
| REMOTE_HF_HOME="${REMOTE_PROJECT}/hf_home" | |
| REMOTE_HF_HUB="${REMOTE_HF_HOME}/hub" | |
| # nuScenes images (unzipped from the DriveLM image zips) live here on the server. | |
| # load_drivelm_dataset resolves JSON image_paths like "../nuscenes/samples/CAM_FRONT/..." | |
| # against img_base; passing --nuscenes_dir points img_base here so the paths resolve. | |
| REMOTE_NUSCENES="${REMOTE_PROJECT}/nuscenes" | |
| # DriveLM image zips to download (from the OpenDriveLab/DriveLM dataset repo) and | |
| # upload. train ~3.48 GB, val ~0.70 GB. Downloaded to the local HF cache first if | |
| # missing (so the script works on a fresh machine with internet), then synced as | |
| # dataset blobs and unzipped on the server into REMOTE_NUSCENES. | |
| DRIVELM_DATASET_REPO="OpenDriveLab/DriveLM" | |
| DRIVELM_IMG_ZIPS=("drivelm_nus_imgs_train.zip" "drivelm_nus_imgs_val.zip") | |
| # A single SSH control socket so the password is entered once across all commands. | |
| SSH_SOCK="$(mktemp -u)/ssh_mux_unidrive" | |
| mkdir -p "$(dirname "$SSH_SOCK")" | |
| # Common ssh/scp flags. | |
| SSH_OPTS=(-o ControlMaster=auto -o ControlPath="$SSH_SOCK" -o ControlPersist=300 | |
| -o StrictHostKeyChecking=accept-new -o ServerAliveInterval=30) | |
| SSH_BASE=(ssh "${SSH_OPTS[@]}" -p "$REMOTE_PORT" "${REMOTE_USER}@${REMOTE_HOST}") | |
| SCP_BASE=(scp "${SSH_OPTS[@]}" -P "$REMOTE_PORT") | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| # Open the master SSH connection (prompts for password here, once). | |
| open_ssh_master() { | |
| echo "==> Opening SSH master connection to ${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_PORT}" | |
| echo " (enter the server password when prompted; subsequent ssh/scp reuse this connection)" | |
| # A no-op remote command establishes the master. -f would background it, but we | |
| # keep it foreground-ish: ControlPersist keeps it alive for 300s after we exit. | |
| "${SSH_BASE[@]}" -o ConnectTimeout=20 'echo " master connection OK: $(hostname)"' | |
| } | |
| close_ssh_master() { | |
| "${SSH_BASE[@]}" -O exit 2>/dev/null || true | |
| rm -f "$SSH_SOCK" | |
| } | |
| # remote_exists <remote_path> -> echoes "yes"/"no" based on file existence. | |
| remote_exists() { | |
| local p="$1" | |
| local r | |
| r=$("${SSH_BASE[@]}" "test -e '$p' && echo yes || echo no" 2>/dev/null | tr -d '[:space:]') | |
| echo "${r:-no}" | |
| } | |
| # remote_size <remote_path> -> echoes file size in bytes (0 if missing). | |
| remote_size() { | |
| local p="$1" | |
| local r | |
| r=$("${SSH_BASE[@]}" "stat -c%s '$p' 2>/dev/null || echo 0" 2>/dev/null | tr -d '[:space:]') | |
| echo "${r:-0}" | |
| } | |
| # upload_file <local_path> <remote_path> <label> | |
| upload_file() { | |
| local local_path="$1" remote_path="$2" label="$3" | |
| local local_size remote_size | |
| if [ ! -e "$local_path" ]; then | |
| echo " [SKIP] $label: local file missing ($local_path)" | |
| return 0 | |
| fi | |
| local_size=$(stat -c%s "$local_path" 2>/dev/null || echo 0) | |
| remote_size=$(remote_size "$remote_path") | |
| if [ "$(remote_exists "$remote_path")" = "yes" ] && [ "$remote_size" -eq "$local_size" ]; then | |
| echo " [OK] $label: already present on server, skipping" | |
| return 0 | |
| fi | |
| if [ "$remote_size" -ne 0 ] && [ "$remote_size" -ne "$local_size" ]; then | |
| echo " [WARN] $label: remote size mismatch ($remote_size vs $local_size), re-uploading" | |
| fi | |
| echo " [UP] $label: $local_path -> $remote_path" | |
| "${SSH_BASE[@]}" "mkdir -p '$(dirname "$remote_path")'" | |
| "${SCP_BASE[@]}" "$local_path" "${REMOTE_USER}@${REMOTE_HOST}:${remote_path}" | |
| } | |
| # upload_hf_repo <repo_kind> <repo_subpath> <label> | |
| # repo_kind: "models--..." or "datasets--..." | |
| # repo_subpath: e.g. models--owl10--UniDriveVLA_Nusc_Base_Stage1 | |
| # Syncs the HF hub cache layout for one repo onto the remote, recreating the | |
| # blobs/ + snapshots/<rev>/ + refs/main structure with real files (not symlinks) | |
| # so the offline HF cache loads correctly. Idempotent: a blob already present on | |
| # the remote is skipped, so a 4 GB weight is uploaded only once. | |
| # | |
| # We sync blob-by-blob (resolving the local snapshots/<rev>/* symlinks to their | |
| # blob paths) and then rebuild the snapshot symlinks on the remote. This avoids | |
| # the scp -r-on-a-dir-of-symlinks pitfall where the symlinks copy as themselves | |
| # (dangling on the remote) instead of their target files. | |
| upload_hf_repo() { | |
| local repo_subpath="$1" label="$2" | |
| local local_repo="${LOCAL_HF_HUB}/${repo_subpath}" | |
| local remote_repo="${REMOTE_HF_HUB}/${repo_subpath}" | |
| if [ ! -d "$local_repo" ]; then | |
| echo " [SKIP] $label: local repo missing ($local_repo)" | |
| return 0 | |
| fi | |
| "${SSH_BASE[@]}" "mkdir -p '${remote_repo}/blobs' '${remote_repo}/snapshots' '${remote_repo}/refs'" | |
| # 1. Recursively resolve every file/symlink under snapshots/<rev>/, | |
| # upload the blob it points to (idempotent) and recreate the symlink/file | |
| # remotely, preserving the relative directory structure. | |
| local snap_rev snap_dir | |
| for snap_dir in "${local_repo}/snapshots"/*/; do | |
| [ -d "$snap_dir" ] || continue | |
| snap_rev="$(basename "$snap_dir")" | |
| "${SSH_BASE[@]}" "mkdir -p '${remote_repo}/snapshots/${snap_rev}'" | |
| local entry rel_path parent_dir blob_path blob_name | |
| while IFS= read -r -d '' entry; do | |
| rel_path="${entry#${snap_dir}}" | |
| [ -n "$rel_path" ] || continue | |
| parent_dir="$(dirname "$rel_path")" | |
| # Ensure the parent directory exists on the remote. | |
| if [ "$parent_dir" != "." ]; then | |
| "${SSH_BASE[@]}" "mkdir -p '${remote_repo}/snapshots/${snap_rev}/${parent_dir}'" | |
| fi | |
| # Resolve symlink to the real blob file. | |
| blob_path="$(readlink -f "$entry")" | |
| if [ -L "$entry" ] && [ -n "$blob_path" ] && [ -f "$blob_path" ]; then | |
| blob_name="$(basename "$blob_path")" | |
| local remote_blob="${remote_repo}/blobs/${blob_name}" | |
| local local_size remote_size | |
| local_size=$(stat -c%s "$blob_path" 2>/dev/null || echo 0) | |
| remote_size=$(remote_size "$remote_blob") | |
| if [ "$(remote_exists "$remote_blob")" = "yes" ] && [ "$remote_size" -eq "$local_size" ]; then | |
| : # blob already uploaded and size matches | |
| else | |
| if [ "$remote_size" -ne 0 ] && [ "$remote_size" -ne "$local_size" ]; then | |
| echo " [WARN] $label blob ${blob_name}: remote size mismatch ($remote_size vs $local_size), re-uploading" | |
| fi | |
| local sz | |
| sz=$(du -h "$blob_path" | cut -f1) | |
| echo " [UP] $label blob ${blob_name} (${sz})" | |
| "${SCP_BASE[@]}" "$blob_path" "${REMOTE_USER}@${REMOTE_HOST}:${remote_blob}" | |
| fi | |
| # Recreate the snapshot symlink relative to its depth. | |
| local depth_up="../../" | |
| if [ "$parent_dir" != "." ]; then | |
| depth_up="$(printf '../%.0s' $(seq 1 $(( 2 + $(echo "$parent_dir" | tr -cd '/' | wc -c) ))))" | |
| fi | |
| "${SSH_BASE[@]}" "ln -sf '${depth_up}blobs/${blob_name}' '${remote_repo}/snapshots/${snap_rev}/${rel_path}'" | |
| elif [ -f "$entry" ]; then | |
| # Real file (not a symlink) — upload directly. | |
| local remote_entry="${remote_repo}/snapshots/${snap_rev}/${rel_path}" | |
| local local_size remote_size | |
| local_size=$(stat -c%s "$entry" 2>/dev/null || echo 0) | |
| remote_size=$(remote_size "$remote_entry") | |
| if [ "$(remote_exists "$remote_entry")" = "yes" ] && [ "$remote_size" -eq "$local_size" ]; then | |
| : | |
| else | |
| if [ "$remote_size" -ne 0 ] && [ "$remote_size" -ne "$local_size" ]; then | |
| echo " [WARN] $label ${rel_path}: remote size mismatch ($remote_size vs $local_size), re-uploading" | |
| fi | |
| echo " [UP] $label ${rel_path}" | |
| "${SCP_BASE[@]}" "$entry" "${REMOTE_USER}@${REMOTE_HOST}:${remote_entry}" | |
| fi | |
| fi | |
| done < <(find "$snap_dir" -mindepth 1 -print0) | |
| done | |
| # 2. refs/main (commit hash) | |
| if [ -f "${local_repo}/refs/main" ]; then | |
| local remote_ref="${remote_repo}/refs/main" | |
| if [ "$(remote_exists "$remote_ref")" = "yes" ]; then | |
| echo " [OK] $label refs/main: already present" | |
| else | |
| echo " [UP] $label refs/main" | |
| "${SCP_BASE[@]}" "${local_repo}/refs/main" "${REMOTE_USER}@${REMOTE_HOST}:${remote_ref}" | |
| fi | |
| fi | |
| echo " [DONE] $label synced" | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Locate local resources | |
| # --------------------------------------------------------------------------- | |
| LOCAL_HF_HUB="${HOME}/.cache/huggingface/hub" | |
| LOCAL_MODEL_SNAP="${LOCAL_HF_HUB}/models--owl10--UniDriveVLA_Nusc_Base_Stage1/snapshots" | |
| LOCAL_MODEL_SNAP_DIR="$(ls -d "${LOCAL_MODEL_SNAP}"/*/ 2>/dev/null | head -1)" | |
| LOCAL_MODEL_SNAP_DIR="${LOCAL_MODEL_SNAP_DIR%/}" # strip trailing slash | |
| LOCAL_DS_SNAP="${LOCAL_HF_HUB}/datasets--OpenDriveLab--DriveLM/snapshots" | |
| LOCAL_DS_SNAP_DIR="$(ls -d "${LOCAL_DS_SNAP}"/*/ 2>/dev/null | head -1)" | |
| LOCAL_DS_SNAP_DIR="${LOCAL_DS_SNAP_DIR%/}" | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| echo "=========================================" | |
| echo " remote_sync.sh — UniDriveVLA NVFP4 sync" | |
| echo "=========================================" | |
| echo " server: ${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_PORT}" | |
| echo " project dir: ${REMOTE_PROJECT}" | |
| echo " remote HF hub: ${REMOTE_HF_HUB}" | |
| echo " remote nuscenes: ${REMOTE_NUSCENES}" | |
| echo | |
| echo "Local resources:" | |
| echo " model snapshot: ${LOCAL_MODEL_SNAP_DIR:-(NOT FOUND)}" | |
| echo " dataset snapshot: ${LOCAL_DS_SNAP_DIR:-(NOT FOUND)}" | |
| echo " code dir: ${SCRIPT_DIR}" | |
| echo | |
| if [ -z "$LOCAL_MODEL_SNAP_DIR" ]; then | |
| echo "ERROR: local model snapshot not found under ${LOCAL_MODEL_SNAP}" | |
| echo " Run the quantization script locally first so HF caches the model, or" | |
| echo " huggingface-cli download owl10/UniDriveVLA_Nusc_Base_Stage1" | |
| exit 1 | |
| fi | |
| if [ -z "$LOCAL_DS_SNAP_DIR" ]; then | |
| echo "ERROR: local DriveLM dataset snapshot not found under ${LOCAL_DS_SNAP}" | |
| echo " Run huggingface-cli download OpenDriveLab/DriveLM --repo-type dataset" | |
| exit 1 | |
| fi | |
| # --------------------------------------------------------------------------- | |
| # 1. Open master SSH connection (single password prompt) | |
| # --------------------------------------------------------------------------- | |
| open_ssh_master | |
| # Ensure remote base dirs exist. | |
| echo | |
| echo "==> Creating remote directories" | |
| "${SSH_BASE[@]}" "mkdir -p '${REMOTE_PROJECT}' '${REMOTE_HF_HUB}'" | |
| # --------------------------------------------------------------------------- | |
| # 2. Upload HF model cache (bf16 weights, ~4 GB) | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "==> [1/4] HF model cache: owl10/UniDriveVLA_Nusc_Base_Stage1" | |
| upload_hf_repo "models--owl10--UniDriveVLA_Nusc_Base_Stage1" "model weights" | |
| # --------------------------------------------------------------------------- | |
| # 2a. Upload sentence-transformers fallback model (~20 MB) | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "==> [1a/4] HF model cache: sentence-transformers/all-MiniLM-L6-v2" | |
| echo " (used by evaluation.py when --gpt_eval is set but no OpenAI key is available)" | |
| upload_hf_repo "models--sentence-transformers--all-MiniLM-L6-v2" "semantic similarity model" | |
| # --------------------------------------------------------------------------- | |
| # 3. Upload DriveLM dataset cache (~185 MB JSON + ~4.2 GB image zips) | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "==> [2/4] HF dataset cache: OpenDriveLab/DriveLM (JSON + image zips)" | |
| # upload_hf_repo syncs every file in the dataset snapshot — which now includes | |
| # the image zips downloaded in the previous local step. The 3.5 GB train zip and | |
| # 0.7 GB val zip are uploaded as dataset blobs here (idempotent per-blob), then | |
| # unzipped into REMOTE_NUSCENES in the next step. | |
| upload_hf_repo "datasets--OpenDriveLab--DriveLM" "DriveLM dataset" | |
| # --------------------------------------------------------------------------- | |
| # 3. DriveLM nuScenes images (download zips if missing, sync, unzip on server) | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "==> [2b/4] DriveLM nuScenes images (zips -> unzip on server)" | |
| # 3a. Ensure the image zips exist in the LOCAL HF cache. hf download is | |
| # idempotent/resumable: completed files are skipped on re-run. | |
| for zip in "${DRIVELM_IMG_ZIPS[@]}"; do | |
| if "${SSH_BASE[@]}" "true" >/dev/null 2>&1; then :; fi # keep master warm | |
| # Resolve the local blob path the way HF caches it (snapshot symlink -> blob). | |
| local_zip="${LOCAL_HF_HUB}/datasets--OpenDriveLab--DriveLM/snapshots/${LOCAL_DS_SNAP_DIR##*/}/${zip}" | |
| if [ ! -e "$local_zip" ]; then | |
| echo " [DL] ${zip}: not in local cache, downloading from ${DRIVELM_DATASET_REPO} ..." | |
| hf download "${DRIVELM_DATASET_REPO}" "$zip" --repo-type dataset \ | |
| >/tmp/drivelm_dl.log 2>&1 || { | |
| echo " [ERR] download of ${zip} failed; see /tmp/drivelm_dl.log" | |
| tail -5 /tmp/drivelm_dl.log | |
| continue | |
| } | |
| # Re-resolve after download. | |
| local_zip="${LOCAL_HF_HUB}/datasets--OpenDriveLab--DriveLM/snapshots/${LOCAL_DS_SNAP_DIR##*/}/${zip}" | |
| fi | |
| # Resolve symlink to the real blob (upload the real file). | |
| local_blob="$(readlink -f "$local_zip")" | |
| if [ ! -f "$local_blob" ]; then | |
| echo " [ERR] ${zip}: local blob not found after download attempt" | |
| continue | |
| fi | |
| blob_name="$(basename "$local_blob")" | |
| remote_blob="${REMOTE_HF_HUB}/datasets--OpenDriveLab--DriveLM/blobs/${blob_name}" | |
| # Idempotent: skip if the blob already exists on the server. | |
| if [ "$(remote_exists "$remote_blob")" = "yes" ]; then | |
| echo " [OK] ${zip}: blob already on server, skipping upload" | |
| else | |
| sz=$(du -h "$local_blob" | cut -f1) | |
| echo " [UP] ${zip} blob ${blob_name} (${sz})" | |
| "${SSH_BASE[@]}" "mkdir -p '$(dirname "$remote_blob")'" | |
| "${SCP_BASE[@]}" "$local_blob" "${REMOTE_USER}@${REMOTE_HOST}:${remote_blob}" | |
| fi | |
| # Recreate the dataset snapshot symlink so the zip is visible under snapshots/. | |
| "${SSH_BASE[@]}" "ln -sf '../../blobs/${blob_name}' \ | |
| '${REMOTE_HF_HUB}/datasets--OpenDriveLab--DriveLM/snapshots/${LOCAL_DS_SNAP_DIR##*/}/${zip}'" | |
| done | |
| # 3b. Unzip the images on the server into REMOTE_NUSCENES (idempotent: skip if | |
| # the expected samples/CAM_FRONT dir already exists, so re-runs don't re-unzip | |
| # ~4 GB). The zips unpack to a nuscenes/ tree containing samples/CAM_*/. | |
| echo " Unzipping images into ${REMOTE_NUSCENES} ..." | |
| "${SSH_BASE[@]}" " | |
| set -e | |
| NS='${REMOTE_NUSCENES}' | |
| mkdir -p \"\$NS\" | |
| # Idempotency: if samples/CAM_FRONT already populated, skip the unzip. | |
| if [ -d \"\$NS/samples/CAM_FRONT\" ] && [ \"\$(ls -A \"\$NS/samples/CAM_FRONT\" 2>/dev/null | head -1)\" ]; then | |
| echo ' [OK] images already unzipped under '\$NS'/samples, skipping' | |
| else | |
| for zip in ${DRIVELM_IMG_ZIPS[@]// / }; do | |
| zpath='${REMOTE_HF_HUB}/datasets--OpenDriveLab--DriveLM/snapshots/${LOCAL_DS_SNAP_DIR##*/}/'\"\$zip\" | |
| if [ -e \"\$zpath\" ]; then | |
| echo ' [UNZ] '\"\$zip\" | |
| # Unzip into a temp dir, then merge the nuscenes/samples tree into place. | |
| tmp=\$(mktemp -d) | |
| unzip -q \"\$zpath\" -d \"\$tmp\" | |
| # The zip may contain top-level 'nuscenes/samples/...' or 'samples/...'; | |
| # normalize: find a samples/ dir and move it under \$NS. | |
| src_samples=\$(find \"\$tmp\" -maxdepth 3 -type d -name samples | head -1) | |
| if [ -n \"\$src_samples\" ]; then | |
| mkdir -p \"\$NS\" | |
| cp -rn \"\$src_samples\"/. \"\$NS/samples\"/ 2>/dev/null || true | |
| fi | |
| rm -rf \"\$tmp\" | |
| else | |
| echo ' [SKIP] '\"\$zip\"' not found on server' | |
| fi | |
| done | |
| echo ' [DONE] images unzipped' | |
| fi | |
| echo ' samples tree:'; find \"\$NS/samples\" -maxdepth 1 -type d 2>/dev/null | head | |
| " | |
| # --------------------------------------------------------------------------- | |
| # 4. Upload code | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "==> [3/4] Code: unidrive_vla_nusc_base_*.py" | |
| upload_file "${SCRIPT_DIR}/unidrive_vla_nusc_base_quantize_all.py" \ | |
| "${REMOTE_PROJECT}/unidrive_vla_nusc_base_quantize_all.py" \ | |
| "quantize_all.py" | |
| upload_file "${SCRIPT_DIR}/unidrive_vla_nusc_base_evaluation.py" \ | |
| "${REMOTE_PROJECT}/unidrive_vla_nusc_base_evaluation.py" \ | |
| "evaluation.py" | |
| # --------------------------------------------------------------------------- | |
| # 5. Upload prepare_env.sh (run on server afterwards) | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "==> [4/4] Env script: prepare_env.sh" | |
| upload_file "${SCRIPT_DIR}/prepare_env.sh" \ | |
| "${REMOTE_PROJECT}/prepare_env.sh" \ | |
| "prepare_env.sh" | |
| # --------------------------------------------------------------------------- | |
| # Summary + offer to run prepare_env.sh on the server | |
| # --------------------------------------------------------------------------- | |
| echo | |
| echo "=========================================" | |
| echo " Sync summary" | |
| echo "=========================================" | |
| "${SSH_BASE[@]}" " | |
| echo ' remote project dir (code):'; ls -la '${REMOTE_PROJECT}' | grep -E 'unidrive|prepare_env' || true | |
| echo | |
| echo ' remote HF model cache:'; ls -la '${REMOTE_HF_HUB}/models--owl10--UniDriveVLA_Nusc_Base_Stage1/snapshots/'*/ 2>/dev/null | head -20 | |
| echo | |
| echo ' remote DriveLM cache:'; ls -la '${REMOTE_HF_HUB}/datasets--OpenDriveLab--DriveLM/snapshots/'*/ 2>/dev/null | |
| echo | |
| echo ' remote nuscenes samples:'; find '${REMOTE_NUSCENES}/samples' -maxdepth 1 -type d 2>/dev/null | head | |
| echo | |
| echo ' sizes:'; du -sh '${REMOTE_HF_HUB}/models--owl10--UniDriveVLA_Nusc_Base_Stage1' '${REMOTE_HF_HUB}/datasets--OpenDriveLab--DriveLM' '${REMOTE_NUSCENES}' 2>/dev/null | |
| " | |
| # Optional: run prepare_env.sh on the server to install dependencies. | |
| if [ "${SKIP_PREPARE:-0}" = "1" ]; then | |
| echo | |
| echo "SKIP_PREPARE=1 set — not running prepare_env.sh on the server." | |
| echo "Run it manually: ssh -p ${REMOTE_PORT} ${REMOTE_USER}@${REMOTE_HOST} 'bash ${REMOTE_PROJECT}/prepare_env.sh'" | |
| else | |
| echo | |
| echo "=========================================" | |
| echo " Next: run prepare_env.sh on the server?" | |
| echo "=========================================" | |
| echo "This installs dependencies via the Tencent cloud pip mirror (transformers 5.x," | |
| echo "nvidia-modelopt 0.44, nltk/rouge/pycocoevalcap/sentence-transformers/qwen-vl-utils, etc.)." | |
| printf "Run prepare_env.sh on the server now? [y/N] " | |
| read -r ANSWER | |
| if [ "${ANSWER}" = "y" ] || [ "${ANSWER}" = "Y" ]; then | |
| echo | |
| echo "==> Running prepare_env.sh on the server ..." | |
| "${SSH_BASE[@]}" "bash '${REMOTE_PROJECT}/prepare_env.sh'" | |
| else | |
| echo " Skipped. Run it manually later:" | |
| echo " ssh -p ${REMOTE_PORT} ${REMOTE_USER}@${REMOTE_HOST} 'bash ${REMOTE_PROJECT}/prepare_env.sh'" | |
| fi | |
| fi | |
| close_ssh_master | |
| echo | |
| echo "=========================================" | |
| echo " Done. To run NVFP4 quantization on the server:" | |
| echo "=========================================" | |
| cat <<EOF | |
| ssh -p ${REMOTE_PORT} ${REMOTE_USER}@${REMOTE_HOST} | |
| cd ${REMOTE_PROJECT} | |
| export HF_HOME=${REMOTE_HF_HOME} | |
| export HF_HUB_OFFLINE=1 | |
| # --nuscenes_dir tells load_drivelm_dataset where the unzipped multi-view | |
| # images are, so calibration/evaluation see real images (not text-only). | |
| python3 unidrive_vla_nusc_base_quantize_all.py --mode nvfp4 --skip_baseline_eval \ | |
| --nuscenes_dir ${REMOTE_NUSCENES} | |
| EOF | |