Upload folder using huggingface_hub
#3
by kimi000 - opened
This view is limited to 50 files because it contains too many changes. See the raw diff here.
- .gitattributes +170 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/README.md +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.json +0 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.jsonl +0 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000001.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000002.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000003.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000004.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000005.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000006.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000007.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000008.json +22 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000009.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000010.json +16 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000001.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000002.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000003.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000004.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000005.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000006.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000007.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000008.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000009.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000010.json +4 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000001.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000002.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000003.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000004.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000005.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000006.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000007.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000008.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000009.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000010.json +14 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000001.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000002.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000003.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000004.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000005.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000006.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000007.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000008.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000009.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000010.json +18 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000001.json +164 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000002.json +716 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000003.json +164 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000004.json +256 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000005.json +302 -0
- samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000006.json +394 -0
.gitattributes
CHANGED
|
@@ -52,3 +52,173 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 52 |
10samples/sample_0007/overlays/overlay_accepted.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
10samples/sample_0007/overlays/overlay_intended.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
10samples/sample_0007/overlays/overlay_measured.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
10samples/sample_0007/overlays/overlay_accepted.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
10samples/sample_0007/overlays/overlay_intended.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
10samples/sample_0007/overlays/overlay_measured.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/detect_refine_metal_barrier.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/detect_refine_parked_dark_car.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/detect_refine_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/diversify_input_metal_barrier.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/diversify_input_parked_dark_car.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/diversify_input_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/references/ref_metal_barrier.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/references/ref_parked_dark_car.png filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/references/ref_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_city_buildings.png filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_street_light.png filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_street_trees.png filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_twilight_sky.png filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_vehicle_dashboard.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_city_buildings.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_street_light.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_street_trees.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_twilight_sky.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_vehicle_dashboard.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_city_buildings.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_drainage_grate.png filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_parked_car_left.png filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_parked_suv_right.png filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_pedestrian_walking.png filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_shop_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_storefront_sign.png filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_street_signs.png filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_street_trees.png filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_traveling_dark_suv.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_twilight_sky.png filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_vehicle_dashboard.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_white_car.png filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_yellow_lines.png filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/detect_refine_black_sedan.png filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/detect_refine_silver_car.png filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/detect_refine_waiting_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/diversify_input_black_sedan.png filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/diversify_input_silver_car.png filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/diversify_input_waiting_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/references/ref_black_sedan.png filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/references/ref_silver_car.png filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/references/ref_waiting_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/detect_refine_dark_parked_car.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/detect_refine_street_lines.png filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/diversify_input_dark_parked_car.png filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/diversify_input_street_lines.png filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_dark_parked_car.png filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_delivery_truck.png filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_street_lines.png filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_traffic_light.png filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_walker.png filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/detect_refine_concrete_barrier.png filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/detect_refine_silver_car.png filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/diversify_input_concrete_barrier.png filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/diversify_input_silver_car.png filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/diversify_input_uniformed_officer.png filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_bystander_in_suit.png filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_concrete_barrier.png filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_firefighter.png filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_silver_car.png filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_traffic_light.png filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_uniformed_officer.png filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/detect_refine_dark_building_facade.png filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/detect_refine_double_solid_line.png filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/diversify_input_dark_building_facade.png filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/diversify_input_double_solid_line.png filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/diversify_input_white_panel_van.png filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_awning_building_corner.png filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_dark_building_facade.png filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_distant_pedestrian.png filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_double_solid_line.png filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_emergency_vehicle.png filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_green_street_sign.png filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_vertical_illuminated_sign.png filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_white_panel_van.png filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_brick_building_right.png filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_crosswalk_markings.png filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_dashboard.png filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_iron_balcony.png filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_multi_story_building_left.png filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_overhead_wires.png filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_street_light_pole.png filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_dashboard.png filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_iron_balcony.png filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_multi_story_building_left.png filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_overhead_wires.png filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_street_light_pole.png filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_brick_building_right.png filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_bunch_of_balloons.png filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_crosswalk_markings.png filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_dark_car_1.png filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_dark_car_2.png filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_dashboard.png filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_iron_balcony.png filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_multi_story_building_left.png filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_overhead_wires.png filter=lfs diff=lfs merge=lfs -text
|
| 165 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_white_garbage_bag.png filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_white_sedan.png filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_woman_in_dark_dress.png filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/detect_refine_dashboard_reflection.png filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/detect_refine_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/detect_refine_street_lamp.png filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_dashboard_reflection.png filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_sign_holder.png filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_street_lamp.png filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_woman_waiting.png filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_young_man_waiting.png filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_businessman_waiting.png filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_dashboard_reflection.png filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_crossing_right.png filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_standing.png filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_walking_away_sidewalk.png filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_sign_holder.png filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_street_lamp.png filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_woman_waiting.png filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_young_man_waiting.png filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/detect_refine_metal_railing.png filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/detect_refine_overpass.png filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/detect_refine_yellow_lane_line.png filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_metal_railing.png filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_overpass.png filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_street_light.png filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_yellow_lane_line.png filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_metal_railing.png filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_overpass.png filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_pedestrian_in_suit.png filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_yellow_lane_line.png filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_black_suv.png filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_man_in_grey_sweater.png filter=lfs diff=lfs merge=lfs -text
|
| 205 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_pedestrian_in_light_blue.png filter=lfs diff=lfs merge=lfs -text
|
| 206 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_pedestrian_in_striped_shirt.png filter=lfs diff=lfs merge=lfs -text
|
| 207 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
|
| 208 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_black_suv.png filter=lfs diff=lfs merge=lfs -text
|
| 209 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_man_in_grey_sweater.png filter=lfs diff=lfs merge=lfs -text
|
| 210 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_man_in_pink_shirt.png filter=lfs diff=lfs merge=lfs -text
|
| 211 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_in_light_blue.png filter=lfs diff=lfs merge=lfs -text
|
| 212 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_in_striped_shirt.png filter=lfs diff=lfs merge=lfs -text
|
| 213 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
|
| 214 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_with_backpack.png filter=lfs diff=lfs merge=lfs -text
|
| 215 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/main_image.png filter=lfs diff=lfs merge=lfs -text
|
| 216 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_black_suv.png filter=lfs diff=lfs merge=lfs -text
|
| 217 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_man_in_grey_sweater.png filter=lfs diff=lfs merge=lfs -text
|
| 218 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_man_in_pink_shirt.png filter=lfs diff=lfs merge=lfs -text
|
| 219 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_light_blue.png filter=lfs diff=lfs merge=lfs -text
|
| 220 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_light_jacket.png filter=lfs diff=lfs merge=lfs -text
|
| 221 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_red.png filter=lfs diff=lfs merge=lfs -text
|
| 222 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_striped_shirt.png filter=lfs diff=lfs merge=lfs -text
|
| 223 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
|
| 224 |
+
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_with_backpack.png filter=lfs diff=lfs merge=lfs -text
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/README.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# samples_v8
|
| 2 |
+
|
| 3 |
+
Generated with `data_recipe_v8.md`: vocabulary-first planning, adaptive canvas selection, structured JSON compose prompts, no identity verification, no gate, SAM white-background reference postprocessing, and strict reference-completeness verification with regenerate-until-pass behavior.
|
| 4 |
+
|
| 5 |
+
- chat model: `gcp/google/gemini-3.1-pro-preview`
|
| 6 |
+
- image model: `gcp/google/gemini-3-pro-image-preview`
|
| 7 |
+
- people references: `white_bg_full_body_front`
|
| 8 |
+
- non-person references: `white_bg_encyclopedia_photo`
|
| 9 |
+
- SAM postprocess: every generated reference is segmented with `sam_vit_b` and pasted onto pure `#ffffff` background
|
| 10 |
+
- reference verify max attempts per subject: `10`
|
| 11 |
+
- allowed canvases: `[{"aspect_ratio": "1:1", "size": [1024, 1024], "style": "photorealistic"}, {"aspect_ratio": "4:3", "size": [1152, 864], "style": "photorealistic"}, {"aspect_ratio": "3:4", "size": [864, 1152], "style": "photorealistic"}, {"aspect_ratio": "3:2", "size": [1248, 832], "style": "photorealistic"}, {"aspect_ratio": "2:3", "size": [832, 1248], "style": "photorealistic"}, {"aspect_ratio": "16:9", "size": [1280, 720], "style": "photorealistic"}, {"aspect_ratio": "9:16", "size": [720, 1280], "style": "photorealistic"}]`
|
| 12 |
+
- scenario mode: `driving`
|
| 13 |
+
- pools: `vocab_task_pool`, `plan_pool`, `scene_pool`, `detection_pool`, `reference_pool`
|
| 14 |
+
- bbox overlay: `bbox_overlay.png` draws every planned subject bbox; a sample is rejected and regenerated if any planned subject is still missing after VLM detection retries
|
| 15 |
+
- detection max attempts per subject: `3`
|
| 16 |
+
- launch args: `{"compose_workers": 3, "detect_max_attempts": 3, "detect_workers": 3, "emit_workers": 4, "idle_sleep": 1.0, "image_inflight": 32, "image_interval": 0.05, "image_max_retries": 8, "max_retries": 3, "no_topup": false, "plan_workers": 6, "ref_verify_max_attempts": 10, "reference_workers": 6, "requeue_in_progress": true, "seed": 1781927993, "status_interval": 30.0, "subject_detect_workers": 24, "target_samples": 10}`
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000001.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000001",
|
| 3 |
+
"plan_path": "sample_000001/plan.json",
|
| 4 |
+
"task_path": "sample_000001/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000001/main_image.png",
|
| 6 |
+
"detections": "sample_000001/detections.json",
|
| 7 |
+
"n_detected": 3,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000001",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000002.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000002",
|
| 3 |
+
"plan_path": "sample_000002/plan.json",
|
| 4 |
+
"task_path": "sample_000002/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000002/main_image.png",
|
| 6 |
+
"detections": "sample_000002/detections.json",
|
| 7 |
+
"n_detected": 15,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000002",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000003.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000003",
|
| 3 |
+
"plan_path": "sample_000003/plan.json",
|
| 4 |
+
"task_path": "sample_000003/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000003/main_image.png",
|
| 6 |
+
"detections": "sample_000003/detections.json",
|
| 7 |
+
"n_detected": 3,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000003",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000004.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000004",
|
| 3 |
+
"plan_path": "sample_000004/plan.json",
|
| 4 |
+
"task_path": "sample_000004/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000004/main_image.png",
|
| 6 |
+
"detections": "sample_000004/detections.json",
|
| 7 |
+
"n_detected": 5,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000004",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000005.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000005",
|
| 3 |
+
"plan_path": "sample_000005/plan.json",
|
| 4 |
+
"task_path": "sample_000005/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000005/main_image.png",
|
| 6 |
+
"detections": "sample_000005/detections.json",
|
| 7 |
+
"n_detected": 6,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000005",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000006.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000006",
|
| 3 |
+
"plan_path": "sample_000006/plan.json",
|
| 4 |
+
"task_path": "sample_000006/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000006/main_image.png",
|
| 6 |
+
"detections": "sample_000006/detections.json",
|
| 7 |
+
"n_detected": 8,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000006",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000007.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000007",
|
| 3 |
+
"plan_path": "sample_000007/plan.json",
|
| 4 |
+
"task_path": "sample_000007/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000007/main_image.png",
|
| 6 |
+
"detections": "sample_000007/detections.json",
|
| 7 |
+
"n_detected": 14,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000007",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000008.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000008",
|
| 3 |
+
"plan_path": "sample_000008/plan.json",
|
| 4 |
+
"task_path": "sample_000008/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000008/main_image.png",
|
| 6 |
+
"detections": "sample_000008/detections.json",
|
| 7 |
+
"n_detected": 10,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000008",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 1,
|
| 15 |
+
"errors": [
|
| 16 |
+
{
|
| 17 |
+
"time": 1782293479.3988369,
|
| 18 |
+
"error": "RuntimeError: reference generation or verification failed for street_lamp: RuntimeError: reference verification failed for street_lamp after 10 attempts: heavily truncated; missing main identifying part (lamp fixture)",
|
| 19 |
+
"traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 866, in generate_references\n references.append(helpers.diversify_subject(get_client(), sdir, main_image, subject, detection))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/v8_helpers.py\", line 693, in diversify_subject\n raise RuntimeError(\nRuntimeError: reference verification failed for street_lamp after 10 attempts: heavily truncated; missing main identifying part (lamp fixture)\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1113, in handler\n references, reference_errors = generate_references(sample_id, plan, detections)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 871, in generate_references\n raise RuntimeError(f\"reference generation or verification failed for {name}: {errors[name]}\") from exc\nRuntimeError: reference generation or verification failed for street_lamp: RuntimeError: reference verification failed for street_lamp after 10 attempts: heavily truncated; missing main identifying part (lamp fixture)\n"
|
| 20 |
+
}
|
| 21 |
+
]
|
| 22 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000009.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000009",
|
| 3 |
+
"plan_path": "sample_000009/plan.json",
|
| 4 |
+
"task_path": "sample_000009/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000009/main_image.png",
|
| 6 |
+
"detections": "sample_000009/detections.json",
|
| 7 |
+
"n_detected": 5,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000009",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000010.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000010",
|
| 3 |
+
"plan_path": "sample_000010/plan.json",
|
| 4 |
+
"task_path": "sample_000010/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000010/main_image.png",
|
| 6 |
+
"detections": "sample_000010/detections.json",
|
| 7 |
+
"n_detected": 9,
|
| 8 |
+
"model_ids": {
|
| 9 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 10 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 11 |
+
},
|
| 12 |
+
"item_id": "sample_000010",
|
| 13 |
+
"pool": "detection_pool",
|
| 14 |
+
"retry_count": 0,
|
| 15 |
+
"errors": []
|
| 16 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000001.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000001",
|
| 3 |
+
"row": "sample_000001/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000002.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000002",
|
| 3 |
+
"row": "sample_000002/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000003.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000003",
|
| 3 |
+
"row": "sample_000003/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000004.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000004",
|
| 3 |
+
"row": "sample_000004/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000005.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000005",
|
| 3 |
+
"row": "sample_000005/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000006.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000006",
|
| 3 |
+
"row": "sample_000006/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000007.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000007",
|
| 3 |
+
"row": "sample_000007/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000008.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000008",
|
| 3 |
+
"row": "sample_000008/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000009.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000009",
|
| 3 |
+
"row": "sample_000009/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000010.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000010",
|
| 3 |
+
"row": "sample_000010/row.json"
|
| 4 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000001.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000001",
|
| 3 |
+
"plan_path": "sample_000001/plan.json",
|
| 4 |
+
"task_path": "sample_000001/vocab_task.json",
|
| 5 |
+
"prompt_hash": "f859db774945f5b6373454badaca82233955ab8ca8b947dbecdab4a6975bb976",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000001",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000002.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000002",
|
| 3 |
+
"plan_path": "sample_000002/plan.json",
|
| 4 |
+
"task_path": "sample_000002/vocab_task.json",
|
| 5 |
+
"prompt_hash": "0e4f0ac71145112a7a163e8d8798062138922bb63e746f904a784c8608c2d011",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000002",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000003.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000003",
|
| 3 |
+
"plan_path": "sample_000003/plan.json",
|
| 4 |
+
"task_path": "sample_000003/vocab_task.json",
|
| 5 |
+
"prompt_hash": "325b286ae7c454ccf5a9081711812c3439116af9f5e2b39f2029ac6c415ac47e",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000003",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000004.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000004",
|
| 3 |
+
"plan_path": "sample_000004/plan.json",
|
| 4 |
+
"task_path": "sample_000004/vocab_task.json",
|
| 5 |
+
"prompt_hash": "9d98080d776626e886894383e176082ba23f7c536f920f60fd7462fb1e21e246",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000004",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000005.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000005",
|
| 3 |
+
"plan_path": "sample_000005/plan.json",
|
| 4 |
+
"task_path": "sample_000005/vocab_task.json",
|
| 5 |
+
"prompt_hash": "dd17f99d7265de9375dbd703b97e688634f8632997dd81e3c11f787d9cb95361",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000005",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000006.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000006",
|
| 3 |
+
"plan_path": "sample_000006/plan.json",
|
| 4 |
+
"task_path": "sample_000006/vocab_task.json",
|
| 5 |
+
"prompt_hash": "98e81e2eedabb51a9bd1b9a35847125d84c5ef899ddbf672c226fe62b69506b1",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000006",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000007.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000007",
|
| 3 |
+
"plan_path": "sample_000007/plan.json",
|
| 4 |
+
"task_path": "sample_000007/vocab_task.json",
|
| 5 |
+
"prompt_hash": "a11d77c30b9e5c0a4d946297ad0615803424cd56063dca0351a861c7c079059b",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000007",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000008.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000008",
|
| 3 |
+
"plan_path": "sample_000008/plan.json",
|
| 4 |
+
"task_path": "sample_000008/vocab_task.json",
|
| 5 |
+
"prompt_hash": "fd36686545adb806e34c1b62a7004d06a56e505d47d056ee5b0b89752f3d75bd",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000008",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000009.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000009",
|
| 3 |
+
"plan_path": "sample_000009/plan.json",
|
| 4 |
+
"task_path": "sample_000009/vocab_task.json",
|
| 5 |
+
"prompt_hash": "bddb7951d32d26de33b4ea1a6aa00cdbd7da12d9349508f23b30d7e3e1eadef8",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000009",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000010.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000010",
|
| 3 |
+
"plan_path": "sample_000010/plan.json",
|
| 4 |
+
"task_path": "sample_000010/vocab_task.json",
|
| 5 |
+
"prompt_hash": "cc6385c75e6510ab98831d1b4008bb6fc0bdaf521d9d020a82a36d3a08b9d8d8",
|
| 6 |
+
"model_ids": {
|
| 7 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 8 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 9 |
+
},
|
| 10 |
+
"item_id": "sample_000010",
|
| 11 |
+
"pool": "plan_pool",
|
| 12 |
+
"retry_count": 0,
|
| 13 |
+
"errors": []
|
| 14 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000001.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000001",
|
| 3 |
+
"plan_path": "sample_000001/plan.json",
|
| 4 |
+
"task_path": "sample_000001/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000001/main_image.png",
|
| 6 |
+
"detections": "sample_000001/detections.json",
|
| 7 |
+
"references": "sample_000001/references.json",
|
| 8 |
+
"n_references": 3,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000001",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000002.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000002",
|
| 3 |
+
"plan_path": "sample_000002/plan.json",
|
| 4 |
+
"task_path": "sample_000002/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000002/main_image.png",
|
| 6 |
+
"detections": "sample_000002/detections.json",
|
| 7 |
+
"references": "sample_000002/references.json",
|
| 8 |
+
"n_references": 15,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000002",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000003.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000003",
|
| 3 |
+
"plan_path": "sample_000003/plan.json",
|
| 4 |
+
"task_path": "sample_000003/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000003/main_image.png",
|
| 6 |
+
"detections": "sample_000003/detections.json",
|
| 7 |
+
"references": "sample_000003/references.json",
|
| 8 |
+
"n_references": 3,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000003",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000004.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000004",
|
| 3 |
+
"plan_path": "sample_000004/plan.json",
|
| 4 |
+
"task_path": "sample_000004/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000004/main_image.png",
|
| 6 |
+
"detections": "sample_000004/detections.json",
|
| 7 |
+
"references": "sample_000004/references.json",
|
| 8 |
+
"n_references": 5,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000004",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000005.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000005",
|
| 3 |
+
"plan_path": "sample_000005/plan.json",
|
| 4 |
+
"task_path": "sample_000005/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000005/main_image.png",
|
| 6 |
+
"detections": "sample_000005/detections.json",
|
| 7 |
+
"references": "sample_000005/references.json",
|
| 8 |
+
"n_references": 6,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000005",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000006.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000006",
|
| 3 |
+
"plan_path": "sample_000006/plan.json",
|
| 4 |
+
"task_path": "sample_000006/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000006/main_image.png",
|
| 6 |
+
"detections": "sample_000006/detections.json",
|
| 7 |
+
"references": "sample_000006/references.json",
|
| 8 |
+
"n_references": 8,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000006",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000007.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000007",
|
| 3 |
+
"plan_path": "sample_000007/plan.json",
|
| 4 |
+
"task_path": "sample_000007/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000007/main_image.png",
|
| 6 |
+
"detections": "sample_000007/detections.json",
|
| 7 |
+
"references": "sample_000007/references.json",
|
| 8 |
+
"n_references": 14,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000007",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000008.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000008",
|
| 3 |
+
"plan_path": "sample_000008/plan.json",
|
| 4 |
+
"task_path": "sample_000008/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000008/main_image.png",
|
| 6 |
+
"detections": "sample_000008/detections.json",
|
| 7 |
+
"references": "sample_000008/references.json",
|
| 8 |
+
"n_references": 10,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000008",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000009.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000009",
|
| 3 |
+
"plan_path": "sample_000009/plan.json",
|
| 4 |
+
"task_path": "sample_000009/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000009/main_image.png",
|
| 6 |
+
"detections": "sample_000009/detections.json",
|
| 7 |
+
"references": "sample_000009/references.json",
|
| 8 |
+
"n_references": 5,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000009",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000010.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000010",
|
| 3 |
+
"plan_path": "sample_000010/plan.json",
|
| 4 |
+
"task_path": "sample_000010/vocab_task.json",
|
| 5 |
+
"main_image": "sample_000010/main_image.png",
|
| 6 |
+
"detections": "sample_000010/detections.json",
|
| 7 |
+
"references": "sample_000010/references.json",
|
| 8 |
+
"n_references": 9,
|
| 9 |
+
"reference_errors": {},
|
| 10 |
+
"model_ids": {
|
| 11 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 12 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 13 |
+
},
|
| 14 |
+
"item_id": "sample_000010",
|
| 15 |
+
"pool": "reference_pool",
|
| 16 |
+
"retry_count": 0,
|
| 17 |
+
"errors": []
|
| 18 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000001.json
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000001",
|
| 3 |
+
"target_total": 3,
|
| 4 |
+
"target_people": 1,
|
| 5 |
+
"target_objects": 2,
|
| 6 |
+
"canvas_size": [
|
| 7 |
+
1248,
|
| 8 |
+
832
|
| 9 |
+
],
|
| 10 |
+
"canvas_aspect_ratio": "3:2",
|
| 11 |
+
"main_image": "main_image.png",
|
| 12 |
+
"bbox_overlay": "bbox_overlay.png",
|
| 13 |
+
"plan": "plan.json",
|
| 14 |
+
"detections": "detections.json",
|
| 15 |
+
"vocab_task": "vocab_task.json",
|
| 16 |
+
"n_planned": 3,
|
| 17 |
+
"n_detected": 3,
|
| 18 |
+
"n_subjects": 3,
|
| 19 |
+
"subjects": [
|
| 20 |
+
{
|
| 21 |
+
"name": "pedestrian",
|
| 22 |
+
"is_person": true,
|
| 23 |
+
"subject_type": "person",
|
| 24 |
+
"source_set": "people_set",
|
| 25 |
+
"source_image_id": "CrowdHuman:data/data_69/273278,12fc4700013112375.jpg:person:3",
|
| 26 |
+
"source_name": "pedestrian",
|
| 27 |
+
"source_description": "A person wearing a dark coat and trousers. Source dataset: CrowdHuman. Scene context: A bustling city street lined with trees showcasing vibrant yellow autumn foliage, with many pedestrians walking in both directions.",
|
| 28 |
+
"sub_caption": "pedestrian: A person wearing a dark coat and trousers, walking confidently.. Scene role: Walking along the curbside near the barrier.",
|
| 29 |
+
"measured_bbox": [
|
| 30 |
+
0.1528,
|
| 31 |
+
0.301,
|
| 32 |
+
0.2511,
|
| 33 |
+
0.7071
|
| 34 |
+
],
|
| 35 |
+
"detection_confidence": 0.98,
|
| 36 |
+
"ref_style": "white_bg_full_body_front",
|
| 37 |
+
"ref_image": "references/ref_pedestrian.png",
|
| 38 |
+
"raw_ref_image": "references/raw_ref_pedestrian_attempt_01.png",
|
| 39 |
+
"reference_verify": "references/reference_verify_pedestrian.json",
|
| 40 |
+
"reference_verify_passed": true,
|
| 41 |
+
"reference_attempts": 1,
|
| 42 |
+
"sam_white_bg": {
|
| 43 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000001/references/raw_ref_pedestrian_attempt_01.png",
|
| 44 |
+
"output": "references/ref_pedestrian.png",
|
| 45 |
+
"mask": "references/sam_mask_pedestrian.png",
|
| 46 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 47 |
+
"sam_model_type": "vit_b",
|
| 48 |
+
"sam_device": "auto",
|
| 49 |
+
"sam_working_size": [
|
| 50 |
+
640,
|
| 51 |
+
640
|
| 52 |
+
],
|
| 53 |
+
"sam_max_side": 640,
|
| 54 |
+
"sam_downscale": 0.625,
|
| 55 |
+
"prompt_box_xyxy": [
|
| 56 |
+
330.0,
|
| 57 |
+
42.0,
|
| 58 |
+
698.0,
|
| 59 |
+
1007.0
|
| 60 |
+
],
|
| 61 |
+
"mask_score": 3.413244,
|
| 62 |
+
"mask_area_ratio": 0.159381,
|
| 63 |
+
"elapsed_seconds": 33.2771
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "parked_dark_car",
|
| 68 |
+
"is_person": false,
|
| 69 |
+
"subject_type": "object",
|
| 70 |
+
"source_set": "obj_set",
|
| 71 |
+
"source_image_id": "BDD100K:bcb356f6-520dd65c:object:9",
|
| 72 |
+
"source_name": "parked dark car",
|
| 73 |
+
"source_description": "A dark-colored sedan parked on the right side of the street, behind the silver car. Source dataset: BDD100K. Scene context: A large white New York City bus is driving down a city street on an overcast day with other cars parked and driving around it.",
|
| 74 |
+
"sub_caption": "parked dark car: A dark-colored sedan.. Scene role: Parked on the street near the curb in the background right.",
|
| 75 |
+
"measured_bbox": [
|
| 76 |
+
0.5163,
|
| 77 |
+
0.3897,
|
| 78 |
+
0.9968,
|
| 79 |
+
0.9244
|
| 80 |
+
],
|
| 81 |
+
"detection_confidence": 0.98,
|
| 82 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 83 |
+
"ref_image": "references/ref_parked_dark_car.png",
|
| 84 |
+
"raw_ref_image": "references/raw_ref_parked_dark_car_attempt_01.png",
|
| 85 |
+
"reference_verify": "references/reference_verify_parked_dark_car.json",
|
| 86 |
+
"reference_verify_passed": true,
|
| 87 |
+
"reference_attempts": 1,
|
| 88 |
+
"sam_white_bg": {
|
| 89 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000001/references/raw_ref_parked_dark_car_attempt_01.png",
|
| 90 |
+
"output": "references/ref_parked_dark_car.png",
|
| 91 |
+
"mask": "references/sam_mask_parked_dark_car.png",
|
| 92 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 93 |
+
"sam_model_type": "vit_b",
|
| 94 |
+
"sam_device": "auto",
|
| 95 |
+
"sam_working_size": [
|
| 96 |
+
640,
|
| 97 |
+
640
|
| 98 |
+
],
|
| 99 |
+
"sam_max_side": 640,
|
| 100 |
+
"sam_downscale": 0.625,
|
| 101 |
+
"prompt_box_xyxy": [
|
| 102 |
+
47.0,
|
| 103 |
+
315.0,
|
| 104 |
+
976.0,
|
| 105 |
+
694.0
|
| 106 |
+
],
|
| 107 |
+
"mask_score": 3.4345,
|
| 108 |
+
"mask_area_ratio": 0.180014,
|
| 109 |
+
"elapsed_seconds": 7.1991
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "metal_barrier",
|
| 114 |
+
"is_person": false,
|
| 115 |
+
"subject_type": "object",
|
| 116 |
+
"source_set": "obj_set",
|
| 117 |
+
"source_image_id": "CrowdHuman:data/data_74/284193,1da20000b642be5b.jpg:object:5",
|
| 118 |
+
"source_name": "metal barrier",
|
| 119 |
+
"source_description": "silver metal barricade placed near the entrance Source dataset: CrowdHuman. Scene context: People are gathered outside the entrance of a stone building with arched doorways and large windows.",
|
| 120 |
+
"sub_caption": "metal barrier: A silver metal barricade placed along the street.. Scene role: Positioned along the curb to section off the pedestrian area from the road.",
|
| 121 |
+
"measured_bbox": [
|
| 122 |
+
0.3454,
|
| 123 |
+
0.4302,
|
| 124 |
+
0.5465,
|
| 125 |
+
0.8402
|
| 126 |
+
],
|
| 127 |
+
"detection_confidence": 0.98,
|
| 128 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 129 |
+
"ref_image": "references/ref_metal_barrier.png",
|
| 130 |
+
"raw_ref_image": "references/raw_ref_metal_barrier_attempt_01.png",
|
| 131 |
+
"reference_verify": "references/reference_verify_metal_barrier.json",
|
| 132 |
+
"reference_verify_passed": true,
|
| 133 |
+
"reference_attempts": 1,
|
| 134 |
+
"sam_white_bg": {
|
| 135 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000001/references/raw_ref_metal_barrier_attempt_01.png",
|
| 136 |
+
"output": "references/ref_metal_barrier.png",
|
| 137 |
+
"mask": "references/sam_mask_metal_barrier.png",
|
| 138 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 139 |
+
"sam_model_type": "vit_b",
|
| 140 |
+
"sam_device": "auto",
|
| 141 |
+
"sam_working_size": [
|
| 142 |
+
640,
|
| 143 |
+
640
|
| 144 |
+
],
|
| 145 |
+
"sam_max_side": 640,
|
| 146 |
+
"sam_downscale": 0.625,
|
| 147 |
+
"prompt_box_xyxy": [
|
| 148 |
+
2.0,
|
| 149 |
+
107.0,
|
| 150 |
+
1009.0,
|
| 151 |
+
986.0
|
| 152 |
+
],
|
| 153 |
+
"mask_score": 1.555076,
|
| 154 |
+
"mask_area_ratio": 0.845579,
|
| 155 |
+
"elapsed_seconds": 7.2854
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
],
|
| 159 |
+
"not_emitted": [],
|
| 160 |
+
"model_ids": {
|
| 161 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 162 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 163 |
+
}
|
| 164 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000002.json
ADDED
|
@@ -0,0 +1,716 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000002",
|
| 3 |
+
"target_total": 15,
|
| 4 |
+
"target_people": 2,
|
| 5 |
+
"target_objects": 13,
|
| 6 |
+
"canvas_size": [
|
| 7 |
+
1152,
|
| 8 |
+
864
|
| 9 |
+
],
|
| 10 |
+
"canvas_aspect_ratio": "4:3",
|
| 11 |
+
"main_image": "main_image.png",
|
| 12 |
+
"bbox_overlay": "bbox_overlay.png",
|
| 13 |
+
"plan": "plan.json",
|
| 14 |
+
"detections": "detections.json",
|
| 15 |
+
"vocab_task": "vocab_task.json",
|
| 16 |
+
"n_planned": 15,
|
| 17 |
+
"n_detected": 15,
|
| 18 |
+
"n_subjects": 15,
|
| 19 |
+
"subjects": [
|
| 20 |
+
{
|
| 21 |
+
"name": "pedestrian_walking",
|
| 22 |
+
"is_person": true,
|
| 23 |
+
"subject_type": "person",
|
| 24 |
+
"source_set": "people_set",
|
| 25 |
+
"source_image_id": "BDD100K:c5d864fa-b0b2380b:person:2",
|
| 26 |
+
"source_name": "pedestrian",
|
| 27 |
+
"source_description": "Person walking away from the camera on the right sidewalk, wearing dark clothing. Source dataset: BDD100K. Scene context: View from inside a vehicle driving down a wet city street lined with tall buildings, with pedestrians on the sidewalk and several cars and taxis ahead in the rain.",
|
| 28 |
+
"sub_caption": "pedestrian: Person walking away from the camera on the right sidewalk, wearing dark clothing.. Scene role: walking along the sidewalk on the right side of the street",
|
| 29 |
+
"measured_bbox": [
|
| 30 |
+
0.7497,
|
| 31 |
+
0.4757,
|
| 32 |
+
0.7954,
|
| 33 |
+
0.6192
|
| 34 |
+
],
|
| 35 |
+
"detection_confidence": 100,
|
| 36 |
+
"ref_style": "white_bg_full_body_front",
|
| 37 |
+
"ref_image": "references/ref_pedestrian_walking.png",
|
| 38 |
+
"raw_ref_image": "references/raw_ref_pedestrian_walking_attempt_01.png",
|
| 39 |
+
"reference_verify": "references/reference_verify_pedestrian_walking.json",
|
| 40 |
+
"reference_verify_passed": true,
|
| 41 |
+
"reference_attempts": 1,
|
| 42 |
+
"sam_white_bg": {
|
| 43 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_pedestrian_walking_attempt_01.png",
|
| 44 |
+
"output": "references/ref_pedestrian_walking.png",
|
| 45 |
+
"mask": "references/sam_mask_pedestrian_walking.png",
|
| 46 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 47 |
+
"sam_model_type": "vit_b",
|
| 48 |
+
"sam_device": "auto",
|
| 49 |
+
"sam_working_size": [
|
| 50 |
+
640,
|
| 51 |
+
640
|
| 52 |
+
],
|
| 53 |
+
"sam_max_side": 640,
|
| 54 |
+
"sam_downscale": 0.625,
|
| 55 |
+
"prompt_box_xyxy": [
|
| 56 |
+
341.0,
|
| 57 |
+
63.0,
|
| 58 |
+
695.0,
|
| 59 |
+
972.0
|
| 60 |
+
],
|
| 61 |
+
"mask_score": 3.459152,
|
| 62 |
+
"mask_area_ratio": 0.145545,
|
| 63 |
+
"elapsed_seconds": 8.3331
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "shop_pedestrian",
|
| 68 |
+
"is_person": true,
|
| 69 |
+
"subject_type": "person",
|
| 70 |
+
"source_set": "people_set",
|
| 71 |
+
"source_image_id": "BDD100K:b714a088-861a043b:person:2",
|
| 72 |
+
"source_name": "pedestrian",
|
| 73 |
+
"source_description": "another person near the shop entrance on the left, partially obscured Source dataset: BDD100K. Scene context: a city street scene from the perspective of a vehicle, showing multiple cars in motion, buildings lining the road, and pedestrians on the sidewalks during dusk or early evening",
|
| 74 |
+
"sub_caption": "pedestrian: Person standing near a shop entrance on the right, partially obscured.. Scene role: standing on the sidewalk near the storefronts on the right",
|
| 75 |
+
"measured_bbox": [
|
| 76 |
+
0.9337,
|
| 77 |
+
0.4752,
|
| 78 |
+
0.9695,
|
| 79 |
+
0.6107
|
| 80 |
+
],
|
| 81 |
+
"detection_confidence": 0.98,
|
| 82 |
+
"ref_style": "white_bg_full_body_front",
|
| 83 |
+
"ref_image": "references/ref_shop_pedestrian.png",
|
| 84 |
+
"raw_ref_image": "references/raw_ref_shop_pedestrian_attempt_02.png",
|
| 85 |
+
"reference_verify": "references/reference_verify_shop_pedestrian.json",
|
| 86 |
+
"reference_verify_passed": true,
|
| 87 |
+
"reference_attempts": 2,
|
| 88 |
+
"sam_white_bg": {
|
| 89 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_shop_pedestrian_attempt_02.png",
|
| 90 |
+
"output": "references/ref_shop_pedestrian.png",
|
| 91 |
+
"mask": "references/sam_mask_shop_pedestrian.png",
|
| 92 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 93 |
+
"sam_model_type": "vit_b",
|
| 94 |
+
"sam_device": "auto",
|
| 95 |
+
"sam_working_size": [
|
| 96 |
+
640,
|
| 97 |
+
640
|
| 98 |
+
],
|
| 99 |
+
"sam_max_side": 640,
|
| 100 |
+
"sam_downscale": 0.625,
|
| 101 |
+
"prompt_box_xyxy": [
|
| 102 |
+
312.0,
|
| 103 |
+
43.0,
|
| 104 |
+
719.0,
|
| 105 |
+
1020.0
|
| 106 |
+
],
|
| 107 |
+
"mask_score": 3.162079,
|
| 108 |
+
"mask_area_ratio": 0.167512,
|
| 109 |
+
"elapsed_seconds": 7.2283
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "city_buildings",
|
| 114 |
+
"is_person": false,
|
| 115 |
+
"subject_type": "object",
|
| 116 |
+
"source_set": "obj_set",
|
| 117 |
+
"source_image_id": "CrowdHuman:data/data_9/283554,31eeb000e9237b31.jpg:object:9",
|
| 118 |
+
"source_name": "building",
|
| 119 |
+
"source_description": "Various city buildings of different heights forming the skyline in the background. Source dataset: CrowdHuman. Scene context: People are walking and resting on the wooden walkway of a large suspension bridge with a city skyline in the background.",
|
| 120 |
+
"sub_caption": "building: Various city buildings of different heights forming the skyline and lining the street.. Scene role: framing the street and forming the background skyline",
|
| 121 |
+
"measured_bbox": [
|
| 122 |
+
0.0,
|
| 123 |
+
0.0,
|
| 124 |
+
1.0,
|
| 125 |
+
0.6084
|
| 126 |
+
],
|
| 127 |
+
"detection_confidence": 0.9,
|
| 128 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 129 |
+
"ref_image": "references/ref_city_buildings.png",
|
| 130 |
+
"raw_ref_image": "references/raw_ref_city_buildings_attempt_01.png",
|
| 131 |
+
"reference_verify": "references/reference_verify_city_buildings.json",
|
| 132 |
+
"reference_verify_passed": true,
|
| 133 |
+
"reference_attempts": 1,
|
| 134 |
+
"sam_white_bg": {
|
| 135 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_city_buildings_attempt_01.png",
|
| 136 |
+
"output": "references/ref_city_buildings.png",
|
| 137 |
+
"mask": "references/sam_mask_city_buildings.png",
|
| 138 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 139 |
+
"sam_model_type": "vit_b",
|
| 140 |
+
"sam_device": "auto",
|
| 141 |
+
"sam_working_size": [
|
| 142 |
+
640,
|
| 143 |
+
640
|
| 144 |
+
],
|
| 145 |
+
"sam_max_side": 640,
|
| 146 |
+
"sam_downscale": 0.625,
|
| 147 |
+
"prompt_box_xyxy": [
|
| 148 |
+
14.0,
|
| 149 |
+
171.0,
|
| 150 |
+
1009.0,
|
| 151 |
+
883.0
|
| 152 |
+
],
|
| 153 |
+
"mask_score": 3.176814,
|
| 154 |
+
"mask_area_ratio": 0.327415,
|
| 155 |
+
"elapsed_seconds": 7.146
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"name": "street_signs",
|
| 160 |
+
"is_person": false,
|
| 161 |
+
"subject_type": "object",
|
| 162 |
+
"source_set": "obj_set",
|
| 163 |
+
"source_image_id": "BDD100K:b5ab0e46-8eab4733:object:8",
|
| 164 |
+
"source_name": "street signs",
|
| 165 |
+
"source_description": "Various street signs attached to a pole on the right side of the street, including a speed limit sign. Source dataset: BDD100K. Scene context: A narrow city street with several parked and moving box trucks and vans, bordered by multi-story buildings and construction barriers.",
|
| 166 |
+
"sub_caption": "street signs: Various blank street signs attached to a pole on the right side of the street.. Scene role: mounted on a pole next to the right sidewalk",
|
| 167 |
+
"measured_bbox": [
|
| 168 |
+
0.641,
|
| 169 |
+
0.165,
|
| 170 |
+
0.744,
|
| 171 |
+
0.408
|
| 172 |
+
],
|
| 173 |
+
"detection_confidence": 0.9,
|
| 174 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 175 |
+
"ref_image": "references/ref_street_signs.png",
|
| 176 |
+
"raw_ref_image": "references/raw_ref_street_signs_attempt_01.png",
|
| 177 |
+
"reference_verify": "references/reference_verify_street_signs.json",
|
| 178 |
+
"reference_verify_passed": true,
|
| 179 |
+
"reference_attempts": 1,
|
| 180 |
+
"sam_white_bg": {
|
| 181 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_street_signs_attempt_01.png",
|
| 182 |
+
"output": "references/ref_street_signs.png",
|
| 183 |
+
"mask": "references/sam_mask_street_signs.png",
|
| 184 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 185 |
+
"sam_model_type": "vit_b",
|
| 186 |
+
"sam_device": "auto",
|
| 187 |
+
"sam_working_size": [
|
| 188 |
+
640,
|
| 189 |
+
640
|
| 190 |
+
],
|
| 191 |
+
"sam_max_side": 640,
|
| 192 |
+
"sam_downscale": 0.625,
|
| 193 |
+
"prompt_box_xyxy": [
|
| 194 |
+
224.0,
|
| 195 |
+
0.0,
|
| 196 |
+
744.0,
|
| 197 |
+
1023.0
|
| 198 |
+
],
|
| 199 |
+
"mask_score": 3.332549,
|
| 200 |
+
"mask_area_ratio": 0.190769,
|
| 201 |
+
"elapsed_seconds": 7.1886
|
| 202 |
+
}
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"name": "storefront_sign",
|
| 206 |
+
"is_person": false,
|
| 207 |
+
"subject_type": "object",
|
| 208 |
+
"source_set": "obj_set",
|
| 209 |
+
"source_image_id": "CrowdHuman:data/data_4/273275,46a6f0005d04fc24.jpg:object:7",
|
| 210 |
+
"source_name": "storefront sign",
|
| 211 |
+
"source_description": "A dark sign with white text 'DELI' and a smaller red 'ATM' sign beneath it on the left. Source dataset: CrowdHuman. Scene context: A densely crowded city street with numerous pedestrians walking towards the camera, while emergency vehicles with flashing lights are visible in the background traffic.",
|
| 212 |
+
"sub_caption": "storefront sign: A dark hanging sign framework attached to a building on the right, devoid of readable text.. Scene role: hanging above the shop entrance on the right side of the road",
|
| 213 |
+
"measured_bbox": [
|
| 214 |
+
0.7854,
|
| 215 |
+
0.1934,
|
| 216 |
+
0.9082,
|
| 217 |
+
0.2906
|
| 218 |
+
],
|
| 219 |
+
"detection_confidence": 0.99,
|
| 220 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 221 |
+
"ref_image": "references/ref_storefront_sign.png",
|
| 222 |
+
"raw_ref_image": "references/raw_ref_storefront_sign_attempt_01.png",
|
| 223 |
+
"reference_verify": "references/reference_verify_storefront_sign.json",
|
| 224 |
+
"reference_verify_passed": true,
|
| 225 |
+
"reference_attempts": 1,
|
| 226 |
+
"sam_white_bg": {
|
| 227 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_storefront_sign_attempt_01.png",
|
| 228 |
+
"output": "references/ref_storefront_sign.png",
|
| 229 |
+
"mask": "references/sam_mask_storefront_sign.png",
|
| 230 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 231 |
+
"sam_model_type": "vit_b",
|
| 232 |
+
"sam_device": "auto",
|
| 233 |
+
"sam_working_size": [
|
| 234 |
+
640,
|
| 235 |
+
640
|
| 236 |
+
],
|
| 237 |
+
"sam_max_side": 640,
|
| 238 |
+
"sam_downscale": 0.625,
|
| 239 |
+
"prompt_box_xyxy": [
|
| 240 |
+
46.0,
|
| 241 |
+
0.0,
|
| 242 |
+
1023.0,
|
| 243 |
+
811.0
|
| 244 |
+
],
|
| 245 |
+
"mask_score": 3.296373,
|
| 246 |
+
"mask_area_ratio": 0.447847,
|
| 247 |
+
"elapsed_seconds": 7.3102
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"name": "parked_suv_right",
|
| 252 |
+
"is_person": false,
|
| 253 |
+
"subject_type": "object",
|
| 254 |
+
"source_set": "obj_set",
|
| 255 |
+
"source_image_id": "BDD100K:c54441e6-400c221e:object:4",
|
| 256 |
+
"source_name": "parked SUV",
|
| 257 |
+
"source_description": "Dark-colored SUV parked ahead of the sedan on the right side of the road. Source dataset: BDD100K. Scene context: Nighttime driving scene approaching an intersection with green traffic lights and parked cars on the right.",
|
| 258 |
+
"sub_caption": "parked SUV: Dark-colored SUV parked on the right side of the road.. Scene role: parked alongside the right curb",
|
| 259 |
+
"measured_bbox": [
|
| 260 |
+
0.5507,
|
| 261 |
+
0.4879,
|
| 262 |
+
0.6783,
|
| 263 |
+
0.6234
|
| 264 |
+
],
|
| 265 |
+
"detection_confidence": 0.95,
|
| 266 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 267 |
+
"ref_image": "references/ref_parked_suv_right.png",
|
| 268 |
+
"raw_ref_image": "references/raw_ref_parked_suv_right_attempt_01.png",
|
| 269 |
+
"reference_verify": "references/reference_verify_parked_suv_right.json",
|
| 270 |
+
"reference_verify_passed": true,
|
| 271 |
+
"reference_attempts": 1,
|
| 272 |
+
"sam_white_bg": {
|
| 273 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_parked_suv_right_attempt_01.png",
|
| 274 |
+
"output": "references/ref_parked_suv_right.png",
|
| 275 |
+
"mask": "references/sam_mask_parked_suv_right.png",
|
| 276 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 277 |
+
"sam_model_type": "vit_b",
|
| 278 |
+
"sam_device": "auto",
|
| 279 |
+
"sam_working_size": [
|
| 280 |
+
640,
|
| 281 |
+
640
|
| 282 |
+
],
|
| 283 |
+
"sam_max_side": 640,
|
| 284 |
+
"sam_downscale": 0.625,
|
| 285 |
+
"prompt_box_xyxy": [
|
| 286 |
+
156.0,
|
| 287 |
+
150.0,
|
| 288 |
+
868.0,
|
| 289 |
+
812.0
|
| 290 |
+
],
|
| 291 |
+
"mask_score": 3.463227,
|
| 292 |
+
"mask_area_ratio": 0.291222,
|
| 293 |
+
"elapsed_seconds": 7.2583
|
| 294 |
+
}
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"name": "parked_car_left",
|
| 298 |
+
"is_person": false,
|
| 299 |
+
"subject_type": "object",
|
| 300 |
+
"source_set": "obj_set",
|
| 301 |
+
"source_image_id": "BDD100K:bb2e43e4-5e7a7129:object:6",
|
| 302 |
+
"source_name": "car",
|
| 303 |
+
"source_description": "A dark car parked along the left curb further ahead. Source dataset: BDD100K. Scene context: A dashcam view from a vehicle driving down a multi-lane road on a partly cloudy day, with a few other cars and pedestrians on the sidewalk.",
|
| 304 |
+
"sub_caption": "car: A dark car parked along the left curb further ahead.. Scene role: parked alongside the left curb",
|
| 305 |
+
"measured_bbox": [
|
| 306 |
+
0.0,
|
| 307 |
+
0.5102,
|
| 308 |
+
0.1259,
|
| 309 |
+
0.5998
|
| 310 |
+
],
|
| 311 |
+
"detection_confidence": 0.9,
|
| 312 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 313 |
+
"ref_image": "references/ref_parked_car_left.png",
|
| 314 |
+
"raw_ref_image": "references/raw_ref_parked_car_left_attempt_01.png",
|
| 315 |
+
"reference_verify": "references/reference_verify_parked_car_left.json",
|
| 316 |
+
"reference_verify_passed": true,
|
| 317 |
+
"reference_attempts": 1,
|
| 318 |
+
"sam_white_bg": {
|
| 319 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_parked_car_left_attempt_01.png",
|
| 320 |
+
"output": "references/ref_parked_car_left.png",
|
| 321 |
+
"mask": "references/sam_mask_parked_car_left.png",
|
| 322 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 323 |
+
"sam_model_type": "vit_b",
|
| 324 |
+
"sam_device": "auto",
|
| 325 |
+
"sam_working_size": [
|
| 326 |
+
640,
|
| 327 |
+
640
|
| 328 |
+
],
|
| 329 |
+
"sam_max_side": 640,
|
| 330 |
+
"sam_downscale": 0.625,
|
| 331 |
+
"prompt_box_xyxy": [
|
| 332 |
+
0.0,
|
| 333 |
+
319.0,
|
| 334 |
+
1023.0,
|
| 335 |
+
695.0
|
| 336 |
+
],
|
| 337 |
+
"mask_score": 3.122119,
|
| 338 |
+
"mask_area_ratio": 0.19451,
|
| 339 |
+
"elapsed_seconds": 8.5738
|
| 340 |
+
}
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"name": "traveling_dark_suv",
|
| 344 |
+
"is_person": false,
|
| 345 |
+
"subject_type": "object",
|
| 346 |
+
"source_set": "obj_set",
|
| 347 |
+
"source_image_id": "BDD100K:c889c950-865ca5b6:object:0",
|
| 348 |
+
"source_name": "dark SUV",
|
| 349 |
+
"source_description": "A dark SUV traveling in the left lane, with visible red taillights. Source dataset: BDD100K. Scene context: Nighttime driving on a highway with several cars visible ahead, illuminated mainly by taillights and headlights.",
|
| 350 |
+
"sub_caption": "dark SUV: A dark SUV traveling in the left lane, with visible red taillights reflecting the twilight.. Scene role: driving in the adjacent lane",
|
| 351 |
+
"measured_bbox": [
|
| 352 |
+
0.2594,
|
| 353 |
+
0.4853,
|
| 354 |
+
0.417,
|
| 355 |
+
0.6419
|
| 356 |
+
],
|
| 357 |
+
"detection_confidence": 0.99,
|
| 358 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 359 |
+
"ref_image": "references/ref_traveling_dark_suv.png",
|
| 360 |
+
"raw_ref_image": "references/raw_ref_traveling_dark_suv_attempt_01.png",
|
| 361 |
+
"reference_verify": "references/reference_verify_traveling_dark_suv.json",
|
| 362 |
+
"reference_verify_passed": true,
|
| 363 |
+
"reference_attempts": 1,
|
| 364 |
+
"sam_white_bg": {
|
| 365 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_traveling_dark_suv_attempt_01.png",
|
| 366 |
+
"output": "references/ref_traveling_dark_suv.png",
|
| 367 |
+
"mask": "references/sam_mask_traveling_dark_suv.png",
|
| 368 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 369 |
+
"sam_model_type": "vit_b",
|
| 370 |
+
"sam_device": "auto",
|
| 371 |
+
"sam_working_size": [
|
| 372 |
+
640,
|
| 373 |
+
640
|
| 374 |
+
],
|
| 375 |
+
"sam_max_side": 640,
|
| 376 |
+
"sam_downscale": 0.625,
|
| 377 |
+
"prompt_box_xyxy": [
|
| 378 |
+
119.0,
|
| 379 |
+
198.0,
|
| 380 |
+
910.0,
|
| 381 |
+
810.0
|
| 382 |
+
],
|
| 383 |
+
"mask_score": 3.470329,
|
| 384 |
+
"mask_area_ratio": 0.300606,
|
| 385 |
+
"elapsed_seconds": 8.5072
|
| 386 |
+
}
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"name": "street_light",
|
| 390 |
+
"is_person": false,
|
| 391 |
+
"subject_type": "object",
|
| 392 |
+
"source_set": "obj_set",
|
| 393 |
+
"source_image_id": "BDD100K:c84f848e-2a5e0737:object:2",
|
| 394 |
+
"source_name": "street light",
|
| 395 |
+
"source_description": "Tall pole with a bright light on top, illuminating the road from the right side. Source dataset: BDD100K. Scene context: Nighttime driving view on a multi-lane highway with streetlights illuminating the dark road ahead.",
|
| 396 |
+
"sub_caption": "street light: Tall pole with a bright light on top, illuminating the road from the right side.. Scene role: providing illumination from the right sidewalk",
|
| 397 |
+
"measured_bbox": [
|
| 398 |
+
0.5577,
|
| 399 |
+
0.0219,
|
| 400 |
+
0.6964,
|
| 401 |
+
0.588
|
| 402 |
+
],
|
| 403 |
+
"detection_confidence": 1.0,
|
| 404 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 405 |
+
"ref_image": "references/ref_street_light.png",
|
| 406 |
+
"raw_ref_image": "references/raw_ref_street_light_attempt_01.png",
|
| 407 |
+
"reference_verify": "references/reference_verify_street_light.json",
|
| 408 |
+
"reference_verify_passed": true,
|
| 409 |
+
"reference_attempts": 1,
|
| 410 |
+
"sam_white_bg": {
|
| 411 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_street_light_attempt_01.png",
|
| 412 |
+
"output": "references/ref_street_light.png",
|
| 413 |
+
"mask": "references/sam_mask_street_light.png",
|
| 414 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 415 |
+
"sam_model_type": "vit_b",
|
| 416 |
+
"sam_device": "auto",
|
| 417 |
+
"sam_working_size": [
|
| 418 |
+
640,
|
| 419 |
+
640
|
| 420 |
+
],
|
| 421 |
+
"sam_max_side": 640,
|
| 422 |
+
"sam_downscale": 0.625,
|
| 423 |
+
"prompt_box_xyxy": [
|
| 424 |
+
330.0,
|
| 425 |
+
17.0,
|
| 426 |
+
688.0,
|
| 427 |
+
996.0
|
| 428 |
+
],
|
| 429 |
+
"mask_score": 3.395182,
|
| 430 |
+
"mask_area_ratio": 0.033435,
|
| 431 |
+
"elapsed_seconds": 7.0701
|
| 432 |
+
}
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"name": "vehicle_dashboard",
|
| 436 |
+
"is_person": false,
|
| 437 |
+
"subject_type": "object",
|
| 438 |
+
"source_set": "obj_set",
|
| 439 |
+
"source_image_id": "BDD100K:b5032e1d-dad95b60:object:9",
|
| 440 |
+
"source_name": "dashboard",
|
| 441 |
+
"source_description": "The dark, reflective dashboard and lower windshield area of the camera vehicle in the foreground. Source dataset: BDD100K. Scene context: View from inside a vehicle driving down a residential city street lined with trees, parked cars, and multi-story brick buildings.",
|
| 442 |
+
"sub_caption": "dashboard: The dark, reflective dashboard and lower windshield area of the camera vehicle in the foreground.. Scene role: anchoring the bottom of the frame to establish a driver's perspective",
|
| 443 |
+
"measured_bbox": [
|
| 444 |
+
0.0,
|
| 445 |
+
0.8881,
|
| 446 |
+
1.0,
|
| 447 |
+
1.0
|
| 448 |
+
],
|
| 449 |
+
"detection_confidence": 0.9,
|
| 450 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 451 |
+
"ref_image": "references/ref_vehicle_dashboard.png",
|
| 452 |
+
"raw_ref_image": "references/raw_ref_vehicle_dashboard_attempt_01.png",
|
| 453 |
+
"reference_verify": "references/reference_verify_vehicle_dashboard.json",
|
| 454 |
+
"reference_verify_passed": true,
|
| 455 |
+
"reference_attempts": 1,
|
| 456 |
+
"sam_white_bg": {
|
| 457 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_vehicle_dashboard_attempt_01.png",
|
| 458 |
+
"output": "references/ref_vehicle_dashboard.png",
|
| 459 |
+
"mask": "references/sam_mask_vehicle_dashboard.png",
|
| 460 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 461 |
+
"sam_model_type": "vit_b",
|
| 462 |
+
"sam_device": "auto",
|
| 463 |
+
"sam_working_size": [
|
| 464 |
+
640,
|
| 465 |
+
640
|
| 466 |
+
],
|
| 467 |
+
"sam_max_side": 640,
|
| 468 |
+
"sam_downscale": 0.625,
|
| 469 |
+
"prompt_box_xyxy": [
|
| 470 |
+
0.0,
|
| 471 |
+
223.0,
|
| 472 |
+
1023.0,
|
| 473 |
+
700.0
|
| 474 |
+
],
|
| 475 |
+
"mask_score": 2.938032,
|
| 476 |
+
"mask_area_ratio": 0.282133,
|
| 477 |
+
"elapsed_seconds": 7.1679
|
| 478 |
+
}
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"name": "drainage_grate",
|
| 482 |
+
"is_person": false,
|
| 483 |
+
"subject_type": "object",
|
| 484 |
+
"source_set": "obj_set",
|
| 485 |
+
"source_image_id": "BDD100K:b4d0e72d-3b208072:object:16",
|
| 486 |
+
"source_name": "drainage grate",
|
| 487 |
+
"source_description": "A metal drainage grate on the edge of the road on the right. Source dataset: BDD100K. Scene context: A driving scene on a multi-lane highway with a dark red minivan in the left lane, under a partly cloudy sky.",
|
| 488 |
+
"sub_caption": "drainage grate: A metal drainage grate on the edge of the road on the right.. Scene role: embedded in the road surface near the right curb",
|
| 489 |
+
"measured_bbox": [
|
| 490 |
+
0.5682,
|
| 491 |
+
0.6773,
|
| 492 |
+
0.8089,
|
| 493 |
+
0.73
|
| 494 |
+
],
|
| 495 |
+
"detection_confidence": 0.99,
|
| 496 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 497 |
+
"ref_image": "references/ref_drainage_grate.png",
|
| 498 |
+
"raw_ref_image": "references/raw_ref_drainage_grate_attempt_01.png",
|
| 499 |
+
"reference_verify": "references/reference_verify_drainage_grate.json",
|
| 500 |
+
"reference_verify_passed": true,
|
| 501 |
+
"reference_attempts": 1,
|
| 502 |
+
"sam_white_bg": {
|
| 503 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_drainage_grate_attempt_01.png",
|
| 504 |
+
"output": "references/ref_drainage_grate.png",
|
| 505 |
+
"mask": "references/sam_mask_drainage_grate.png",
|
| 506 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 507 |
+
"sam_model_type": "vit_b",
|
| 508 |
+
"sam_device": "auto",
|
| 509 |
+
"sam_working_size": [
|
| 510 |
+
640,
|
| 511 |
+
640
|
| 512 |
+
],
|
| 513 |
+
"sam_max_side": 640,
|
| 514 |
+
"sam_downscale": 0.625,
|
| 515 |
+
"prompt_box_xyxy": [
|
| 516 |
+
0.0,
|
| 517 |
+
250.0,
|
| 518 |
+
1023.0,
|
| 519 |
+
773.0
|
| 520 |
+
],
|
| 521 |
+
"mask_score": 3.366042,
|
| 522 |
+
"mask_area_ratio": 0.379179,
|
| 523 |
+
"elapsed_seconds": 8.3171
|
| 524 |
+
}
|
| 525 |
+
},
|
| 526 |
+
{
|
| 527 |
+
"name": "white_car",
|
| 528 |
+
"is_person": false,
|
| 529 |
+
"subject_type": "object",
|
| 530 |
+
"source_set": "obj_set",
|
| 531 |
+
"source_image_id": "BDD100K:b3a102ed-6ef54f5e:object:3",
|
| 532 |
+
"source_name": "white car",
|
| 533 |
+
"source_description": "A white car visible further down the road in the right lane. Source dataset: BDD100K. Scene context: Nighttime driving scene in a city with cars stopped in traffic and a construction site on the right.",
|
| 534 |
+
"sub_caption": "white car: A white car visible further down the road in the right lane.. Scene role: driving ahead in the same lane",
|
| 535 |
+
"measured_bbox": [
|
| 536 |
+
0.4356,
|
| 537 |
+
0.5036,
|
| 538 |
+
0.4784,
|
| 539 |
+
0.548
|
| 540 |
+
],
|
| 541 |
+
"detection_confidence": "high",
|
| 542 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 543 |
+
"ref_image": "references/ref_white_car.png",
|
| 544 |
+
"raw_ref_image": "references/raw_ref_white_car_attempt_01.png",
|
| 545 |
+
"reference_verify": "references/reference_verify_white_car.json",
|
| 546 |
+
"reference_verify_passed": true,
|
| 547 |
+
"reference_attempts": 1,
|
| 548 |
+
"sam_white_bg": {
|
| 549 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_white_car_attempt_01.png",
|
| 550 |
+
"output": "references/ref_white_car.png",
|
| 551 |
+
"mask": "references/sam_mask_white_car.png",
|
| 552 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 553 |
+
"sam_model_type": "vit_b",
|
| 554 |
+
"sam_device": "auto",
|
| 555 |
+
"sam_working_size": [
|
| 556 |
+
640,
|
| 557 |
+
640
|
| 558 |
+
],
|
| 559 |
+
"sam_max_side": 640,
|
| 560 |
+
"sam_downscale": 0.625,
|
| 561 |
+
"prompt_box_xyxy": [
|
| 562 |
+
137.0,
|
| 563 |
+
215.0,
|
| 564 |
+
884.0,
|
| 565 |
+
819.0
|
| 566 |
+
],
|
| 567 |
+
"mask_score": 3.442096,
|
| 568 |
+
"mask_area_ratio": 0.295652,
|
| 569 |
+
"elapsed_seconds": 7.1564
|
| 570 |
+
}
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"name": "yellow_lines",
|
| 574 |
+
"is_person": false,
|
| 575 |
+
"subject_type": "object",
|
| 576 |
+
"source_set": "obj_set",
|
| 577 |
+
"source_image_id": "BDD100K:c417a291-7802692d:object:8",
|
| 578 |
+
"source_name": "yellow lines",
|
| 579 |
+
"source_description": "Double yellow painted lines separating opposite directions of traffic. Source dataset: BDD100K. Scene context: A pedestrian crossing a street at a crosswalk with several cars stopped around them, with storefronts visible in the background.",
|
| 580 |
+
"sub_caption": "yellow lines: Double yellow painted lines separating opposite directions of traffic.. Scene role: painted down the center of the road",
|
| 581 |
+
"measured_bbox": [
|
| 582 |
+
0.0,
|
| 583 |
+
0.622,
|
| 584 |
+
0.2642,
|
| 585 |
+
0.7692
|
| 586 |
+
],
|
| 587 |
+
"detection_confidence": 0.99,
|
| 588 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 589 |
+
"ref_image": "references/ref_yellow_lines.png",
|
| 590 |
+
"raw_ref_image": "references/raw_ref_yellow_lines_attempt_01.png",
|
| 591 |
+
"reference_verify": "references/reference_verify_yellow_lines.json",
|
| 592 |
+
"reference_verify_passed": true,
|
| 593 |
+
"reference_attempts": 1,
|
| 594 |
+
"sam_white_bg": {
|
| 595 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_yellow_lines_attempt_01.png",
|
| 596 |
+
"output": "references/ref_yellow_lines.png",
|
| 597 |
+
"mask": "references/sam_mask_yellow_lines.png",
|
| 598 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 599 |
+
"sam_model_type": "vit_b",
|
| 600 |
+
"sam_device": "auto",
|
| 601 |
+
"sam_working_size": [
|
| 602 |
+
640,
|
| 603 |
+
640
|
| 604 |
+
],
|
| 605 |
+
"sam_max_side": 640,
|
| 606 |
+
"sam_downscale": 0.625,
|
| 607 |
+
"prompt_box_xyxy": [
|
| 608 |
+
0.0,
|
| 609 |
+
0.0,
|
| 610 |
+
1023.0,
|
| 611 |
+
1023.0
|
| 612 |
+
],
|
| 613 |
+
"mask_score": 3.166027,
|
| 614 |
+
"mask_area_ratio": 0.242679,
|
| 615 |
+
"elapsed_seconds": 7.0941
|
| 616 |
+
}
|
| 617 |
+
},
|
| 618 |
+
{
|
| 619 |
+
"name": "street_trees",
|
| 620 |
+
"is_person": false,
|
| 621 |
+
"subject_type": "object",
|
| 622 |
+
"source_set": "obj_set",
|
| 623 |
+
"source_image_id": "BDD100K:c4891df0-24371ae1:object:3",
|
| 624 |
+
"source_name": "trees",
|
| 625 |
+
"source_description": "Numerous trees with dense green and yellowish foliage lining both sides of the road. Source dataset: BDD100K. Scene context: A dark SUV drives ahead on a wet road lined with trees and a fence under a cloudy sky.",
|
| 626 |
+
"sub_caption": "trees: Numerous trees with dense green foliage lining both sides of the road.. Scene role: growing along the sidewalks, adding greenery",
|
| 627 |
+
"measured_bbox": [
|
| 628 |
+
0.2664,
|
| 629 |
+
0.0,
|
| 630 |
+
0.7141,
|
| 631 |
+
0.5127
|
| 632 |
+
],
|
| 633 |
+
"detection_confidence": 0.9,
|
| 634 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 635 |
+
"ref_image": "references/ref_street_trees.png",
|
| 636 |
+
"raw_ref_image": "references/raw_ref_street_trees_attempt_01.png",
|
| 637 |
+
"reference_verify": "references/reference_verify_street_trees.json",
|
| 638 |
+
"reference_verify_passed": true,
|
| 639 |
+
"reference_attempts": 1,
|
| 640 |
+
"sam_white_bg": {
|
| 641 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_street_trees_attempt_01.png",
|
| 642 |
+
"output": "references/ref_street_trees.png",
|
| 643 |
+
"mask": "references/sam_mask_street_trees.png",
|
| 644 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 645 |
+
"sam_model_type": "vit_b",
|
| 646 |
+
"sam_device": "auto",
|
| 647 |
+
"sam_working_size": [
|
| 648 |
+
640,
|
| 649 |
+
640
|
| 650 |
+
],
|
| 651 |
+
"sam_max_side": 640,
|
| 652 |
+
"sam_downscale": 0.625,
|
| 653 |
+
"prompt_box_xyxy": [
|
| 654 |
+
35.0,
|
| 655 |
+
55.0,
|
| 656 |
+
1002.0,
|
| 657 |
+
1000.0
|
| 658 |
+
],
|
| 659 |
+
"mask_score": 3.226043,
|
| 660 |
+
"mask_area_ratio": 0.439437,
|
| 661 |
+
"elapsed_seconds": 7.0986
|
| 662 |
+
}
|
| 663 |
+
},
|
| 664 |
+
{
|
| 665 |
+
"name": "twilight_sky",
|
| 666 |
+
"is_person": false,
|
| 667 |
+
"subject_type": "object",
|
| 668 |
+
"source_set": "obj_set",
|
| 669 |
+
"source_image_id": "BDD100K:c13c0d1f-00dfd075:object:9",
|
| 670 |
+
"source_name": "sky",
|
| 671 |
+
"source_description": "Clear twilight sky transitioning from bright near the horizon to dark blue at the top. Source dataset: BDD100K. Scene context: View from a moving vehicle looking down a multi-lane city street lined with parked cars and large trees under a clear twilight sky.",
|
| 672 |
+
"sub_caption": "sky: Clear twilight sky transitioning from bright near the horizon to dark blue at the top.. Scene role: visible above the buildings and trees at the end of the road",
|
| 673 |
+
"measured_bbox": [
|
| 674 |
+
0.188,
|
| 675 |
+
0.0,
|
| 676 |
+
0.862,
|
| 677 |
+
0.4846
|
| 678 |
+
],
|
| 679 |
+
"detection_confidence": 0.95,
|
| 680 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 681 |
+
"ref_image": "references/ref_twilight_sky.png",
|
| 682 |
+
"raw_ref_image": "references/raw_ref_twilight_sky_attempt_01.png",
|
| 683 |
+
"reference_verify": "references/reference_verify_twilight_sky.json",
|
| 684 |
+
"reference_verify_passed": true,
|
| 685 |
+
"reference_attempts": 1,
|
| 686 |
+
"sam_white_bg": {
|
| 687 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_twilight_sky_attempt_01.png",
|
| 688 |
+
"output": "references/ref_twilight_sky.png",
|
| 689 |
+
"mask": "references/sam_mask_twilight_sky.png",
|
| 690 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 691 |
+
"sam_model_type": "vit_b",
|
| 692 |
+
"sam_device": "auto",
|
| 693 |
+
"sam_working_size": [
|
| 694 |
+
640,
|
| 695 |
+
640
|
| 696 |
+
],
|
| 697 |
+
"sam_max_side": 640,
|
| 698 |
+
"sam_downscale": 0.625,
|
| 699 |
+
"prompt_box_xyxy": [
|
| 700 |
+
72.0,
|
| 701 |
+
72.0,
|
| 702 |
+
951.0,
|
| 703 |
+
951.0
|
| 704 |
+
],
|
| 705 |
+
"mask_score": 3.471577,
|
| 706 |
+
"mask_area_ratio": 0.631801,
|
| 707 |
+
"elapsed_seconds": 7.5016
|
| 708 |
+
}
|
| 709 |
+
}
|
| 710 |
+
],
|
| 711 |
+
"not_emitted": [],
|
| 712 |
+
"model_ids": {
|
| 713 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 714 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 715 |
+
}
|
| 716 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000003.json
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000003",
|
| 3 |
+
"target_total": 3,
|
| 4 |
+
"target_people": 1,
|
| 5 |
+
"target_objects": 2,
|
| 6 |
+
"canvas_size": [
|
| 7 |
+
1280,
|
| 8 |
+
720
|
| 9 |
+
],
|
| 10 |
+
"canvas_aspect_ratio": "16:9",
|
| 11 |
+
"main_image": "main_image.png",
|
| 12 |
+
"bbox_overlay": "bbox_overlay.png",
|
| 13 |
+
"plan": "plan.json",
|
| 14 |
+
"detections": "detections.json",
|
| 15 |
+
"vocab_task": "vocab_task.json",
|
| 16 |
+
"n_planned": 3,
|
| 17 |
+
"n_detected": 3,
|
| 18 |
+
"n_subjects": 3,
|
| 19 |
+
"subjects": [
|
| 20 |
+
{
|
| 21 |
+
"name": "waiting_pedestrian",
|
| 22 |
+
"is_person": true,
|
| 23 |
+
"subject_type": "person",
|
| 24 |
+
"source_set": "people_set",
|
| 25 |
+
"source_image_id": "CrowdHuman:data/data_12/282555,5c403000efcca35d.jpg:person:13",
|
| 26 |
+
"source_name": "shopper",
|
| 27 |
+
"source_description": "A person standing on the top level, wearing a dark top and dark pants. Source dataset: CrowdHuman. Scene context: A multi-level outdoor shopping mall with various people walking and shopping.",
|
| 28 |
+
"sub_caption": "shopper: A pedestrian wearing a dark top and dark pants, standing upright with a natural posture.. Scene role: Waiting at the curb near the crosswalk on the left side of the street.",
|
| 29 |
+
"measured_bbox": [
|
| 30 |
+
0.0928,
|
| 31 |
+
0.1174,
|
| 32 |
+
0.205,
|
| 33 |
+
0.9401
|
| 34 |
+
],
|
| 35 |
+
"detection_confidence": 0.95,
|
| 36 |
+
"ref_style": "white_bg_full_body_front",
|
| 37 |
+
"ref_image": "references/ref_waiting_pedestrian.png",
|
| 38 |
+
"raw_ref_image": "references/raw_ref_waiting_pedestrian_attempt_01.png",
|
| 39 |
+
"reference_verify": "references/reference_verify_waiting_pedestrian.json",
|
| 40 |
+
"reference_verify_passed": true,
|
| 41 |
+
"reference_attempts": 1,
|
| 42 |
+
"sam_white_bg": {
|
| 43 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000003/references/raw_ref_waiting_pedestrian_attempt_01.png",
|
| 44 |
+
"output": "references/ref_waiting_pedestrian.png",
|
| 45 |
+
"mask": "references/sam_mask_waiting_pedestrian.png",
|
| 46 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 47 |
+
"sam_model_type": "vit_b",
|
| 48 |
+
"sam_device": "auto",
|
| 49 |
+
"sam_working_size": [
|
| 50 |
+
640,
|
| 51 |
+
640
|
| 52 |
+
],
|
| 53 |
+
"sam_max_side": 640,
|
| 54 |
+
"sam_downscale": 0.625,
|
| 55 |
+
"prompt_box_xyxy": [
|
| 56 |
+
348.0,
|
| 57 |
+
48.0,
|
| 58 |
+
704.0,
|
| 59 |
+
1015.0
|
| 60 |
+
],
|
| 61 |
+
"mask_score": 3.427649,
|
| 62 |
+
"mask_area_ratio": 0.155239,
|
| 63 |
+
"elapsed_seconds": 6.9951
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "black_sedan",
|
| 68 |
+
"is_person": false,
|
| 69 |
+
"subject_type": "object",
|
| 70 |
+
"source_set": "obj_set",
|
| 71 |
+
"source_image_id": "BDD100K:c0c183ff-1b24f541:object:7",
|
| 72 |
+
"source_name": "black sedan",
|
| 73 |
+
"source_description": "A black sedan driving in the right lane ahead of the red hatchback. Source dataset: BDD100K. Scene context: A view from a vehicle driving down a multi-lane city street flanked by tall buildings, with various cars moving in the same direction and parked along the side.",
|
| 74 |
+
"sub_caption": "black sedan: A glossy black sedan with visible headlights and a detailed front grille.. Scene role: Approaching the crosswalk in the center traffic lane.",
|
| 75 |
+
"measured_bbox": [
|
| 76 |
+
0.3895,
|
| 77 |
+
0.2431,
|
| 78 |
+
0.591,
|
| 79 |
+
0.5084
|
| 80 |
+
],
|
| 81 |
+
"detection_confidence": 0.99,
|
| 82 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 83 |
+
"ref_image": "references/ref_black_sedan.png",
|
| 84 |
+
"raw_ref_image": "references/raw_ref_black_sedan_attempt_01.png",
|
| 85 |
+
"reference_verify": "references/reference_verify_black_sedan.json",
|
| 86 |
+
"reference_verify_passed": true,
|
| 87 |
+
"reference_attempts": 1,
|
| 88 |
+
"sam_white_bg": {
|
| 89 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000003/references/raw_ref_black_sedan_attempt_01.png",
|
| 90 |
+
"output": "references/ref_black_sedan.png",
|
| 91 |
+
"mask": "references/sam_mask_black_sedan.png",
|
| 92 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 93 |
+
"sam_model_type": "vit_b",
|
| 94 |
+
"sam_device": "auto",
|
| 95 |
+
"sam_working_size": [
|
| 96 |
+
640,
|
| 97 |
+
640
|
| 98 |
+
],
|
| 99 |
+
"sam_max_side": 640,
|
| 100 |
+
"sam_downscale": 0.625,
|
| 101 |
+
"prompt_box_xyxy": [
|
| 102 |
+
0.0,
|
| 103 |
+
221.0,
|
| 104 |
+
1023.0,
|
| 105 |
+
796.0
|
| 106 |
+
],
|
| 107 |
+
"mask_score": 3.446312,
|
| 108 |
+
"mask_area_ratio": 0.340465,
|
| 109 |
+
"elapsed_seconds": 7.2258
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "silver_car",
|
| 114 |
+
"is_person": false,
|
| 115 |
+
"subject_type": "object",
|
| 116 |
+
"source_set": "obj_set",
|
| 117 |
+
"source_image_id": "BDD100K:be49ae7a-1ffaa683:object:2",
|
| 118 |
+
"source_name": "silver car",
|
| 119 |
+
"source_description": "A silver car driving in the adjacent right lane, slightly ahead of the white car. Source dataset: BDD100K. Scene context: View from inside a vehicle driving in heavy traffic on a multi-lane road under a clear sky.",
|
| 120 |
+
"sub_caption": "silver car: A metallic silver car reflecting daylight.. Scene role: Driving in the right lane slightly ahead of the black sedan, approaching the intersection.",
|
| 121 |
+
"measured_bbox": [
|
| 122 |
+
0.6628,
|
| 123 |
+
0.2419,
|
| 124 |
+
0.9089,
|
| 125 |
+
0.4999
|
| 126 |
+
],
|
| 127 |
+
"detection_confidence": 0.99,
|
| 128 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 129 |
+
"ref_image": "references/ref_silver_car.png",
|
| 130 |
+
"raw_ref_image": "references/raw_ref_silver_car_attempt_01.png",
|
| 131 |
+
"reference_verify": "references/reference_verify_silver_car.json",
|
| 132 |
+
"reference_verify_passed": true,
|
| 133 |
+
"reference_attempts": 1,
|
| 134 |
+
"sam_white_bg": {
|
| 135 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000003/references/raw_ref_silver_car_attempt_01.png",
|
| 136 |
+
"output": "references/ref_silver_car.png",
|
| 137 |
+
"mask": "references/sam_mask_silver_car.png",
|
| 138 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 139 |
+
"sam_model_type": "vit_b",
|
| 140 |
+
"sam_device": "auto",
|
| 141 |
+
"sam_working_size": [
|
| 142 |
+
640,
|
| 143 |
+
640
|
| 144 |
+
],
|
| 145 |
+
"sam_max_side": 640,
|
| 146 |
+
"sam_downscale": 0.625,
|
| 147 |
+
"prompt_box_xyxy": [
|
| 148 |
+
13.0,
|
| 149 |
+
220.0,
|
| 150 |
+
1011.0,
|
| 151 |
+
811.0
|
| 152 |
+
],
|
| 153 |
+
"mask_score": 3.077144,
|
| 154 |
+
"mask_area_ratio": 0.338042,
|
| 155 |
+
"elapsed_seconds": 7.0902
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
],
|
| 159 |
+
"not_emitted": [],
|
| 160 |
+
"model_ids": {
|
| 161 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 162 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 163 |
+
}
|
| 164 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000004.json
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000004",
|
| 3 |
+
"target_total": 5,
|
| 4 |
+
"target_people": 1,
|
| 5 |
+
"target_objects": 4,
|
| 6 |
+
"canvas_size": [
|
| 7 |
+
1248,
|
| 8 |
+
832
|
| 9 |
+
],
|
| 10 |
+
"canvas_aspect_ratio": "3:2",
|
| 11 |
+
"main_image": "main_image.png",
|
| 12 |
+
"bbox_overlay": "bbox_overlay.png",
|
| 13 |
+
"plan": "plan.json",
|
| 14 |
+
"detections": "detections.json",
|
| 15 |
+
"vocab_task": "vocab_task.json",
|
| 16 |
+
"n_planned": 5,
|
| 17 |
+
"n_detected": 5,
|
| 18 |
+
"n_subjects": 5,
|
| 19 |
+
"subjects": [
|
| 20 |
+
{
|
| 21 |
+
"name": "walker",
|
| 22 |
+
"is_person": true,
|
| 23 |
+
"subject_type": "person",
|
| 24 |
+
"source_set": "people_set",
|
| 25 |
+
"source_image_id": "CrowdHuman:data/data_51/273275,145927000354f7525.jpg:person:10",
|
| 26 |
+
"source_name": "walker",
|
| 27 |
+
"source_description": "A man walking, wearing a blue hoodie with 'EMO' written on it, grey pants, and dark shoes. Source dataset: CrowdHuman. Scene context: A group of people, some wearing matching white t-shirts, are walking and jogging along a paved path next to a road, with a grey SUV parked on a grassy hill in the background.",
|
| 28 |
+
"sub_caption": "walker: A man walking, wearing a plain blue hoodie, grey pants, and dark shoes.. Scene role: Standing at the edge of the sidewalk near the crosswalk, waiting to cross the street.",
|
| 29 |
+
"measured_bbox": [
|
| 30 |
+
0.7914,
|
| 31 |
+
0.2893,
|
| 32 |
+
0.834,
|
| 33 |
+
0.4815
|
| 34 |
+
],
|
| 35 |
+
"detection_confidence": 1.0,
|
| 36 |
+
"ref_style": "white_bg_full_body_front",
|
| 37 |
+
"ref_image": "references/ref_walker.png",
|
| 38 |
+
"raw_ref_image": "references/raw_ref_walker_attempt_01.png",
|
| 39 |
+
"reference_verify": "references/reference_verify_walker.json",
|
| 40 |
+
"reference_verify_passed": true,
|
| 41 |
+
"reference_attempts": 1,
|
| 42 |
+
"sam_white_bg": {
|
| 43 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_walker_attempt_01.png",
|
| 44 |
+
"output": "references/ref_walker.png",
|
| 45 |
+
"mask": "references/sam_mask_walker.png",
|
| 46 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 47 |
+
"sam_model_type": "vit_b",
|
| 48 |
+
"sam_device": "auto",
|
| 49 |
+
"sam_working_size": [
|
| 50 |
+
640,
|
| 51 |
+
640
|
| 52 |
+
],
|
| 53 |
+
"sam_max_side": 640,
|
| 54 |
+
"sam_downscale": 0.625,
|
| 55 |
+
"prompt_box_xyxy": [
|
| 56 |
+
336.0,
|
| 57 |
+
51.0,
|
| 58 |
+
688.0,
|
| 59 |
+
1005.0
|
| 60 |
+
],
|
| 61 |
+
"mask_score": 3.433924,
|
| 62 |
+
"mask_area_ratio": 0.16005,
|
| 63 |
+
"elapsed_seconds": 7.2846
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "traffic_light",
|
| 68 |
+
"is_person": false,
|
| 69 |
+
"subject_type": "object",
|
| 70 |
+
"source_set": "obj_set",
|
| 71 |
+
"source_image_id": "BDD100K:b6df605f-51c158b8:object:6",
|
| 72 |
+
"source_name": "traffic light",
|
| 73 |
+
"source_description": "A set of traffic lights suspended over the intersection, showing a red light. Source dataset: BDD100K. Scene context: A view from a vehicle driving down a city street on a sunny day with a U-Haul truck in the opposite lane, parked cars along the right curb, and pedestrians crossing a crosswalk.",
|
| 74 |
+
"sub_caption": "traffic light: A set of traffic lights suspended over the intersection, showing a red light.. Scene role: Hanging high above the intersection in the upper-center of the frame.",
|
| 75 |
+
"measured_bbox": [
|
| 76 |
+
0.4425,
|
| 77 |
+
0.023,
|
| 78 |
+
0.467,
|
| 79 |
+
0.1052
|
| 80 |
+
],
|
| 81 |
+
"detection_confidence": 0.99,
|
| 82 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 83 |
+
"ref_image": "references/ref_traffic_light.png",
|
| 84 |
+
"raw_ref_image": "references/raw_ref_traffic_light_attempt_01.png",
|
| 85 |
+
"reference_verify": "references/reference_verify_traffic_light.json",
|
| 86 |
+
"reference_verify_passed": true,
|
| 87 |
+
"reference_attempts": 1,
|
| 88 |
+
"sam_white_bg": {
|
| 89 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_traffic_light_attempt_01.png",
|
| 90 |
+
"output": "references/ref_traffic_light.png",
|
| 91 |
+
"mask": "references/sam_mask_traffic_light.png",
|
| 92 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 93 |
+
"sam_model_type": "vit_b",
|
| 94 |
+
"sam_device": "auto",
|
| 95 |
+
"sam_working_size": [
|
| 96 |
+
640,
|
| 97 |
+
640
|
| 98 |
+
],
|
| 99 |
+
"sam_max_side": 640,
|
| 100 |
+
"sam_downscale": 0.625,
|
| 101 |
+
"prompt_box_xyxy": [
|
| 102 |
+
160.0,
|
| 103 |
+
93.0,
|
| 104 |
+
864.0,
|
| 105 |
+
930.0
|
| 106 |
+
],
|
| 107 |
+
"mask_score": 3.437579,
|
| 108 |
+
"mask_area_ratio": 0.253583,
|
| 109 |
+
"elapsed_seconds": 7.0663
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "delivery_truck",
|
| 114 |
+
"is_person": false,
|
| 115 |
+
"subject_type": "object",
|
| 116 |
+
"source_set": "obj_set",
|
| 117 |
+
"source_image_id": "BDD100K:b3a7b21a-48bcf2b8:object:2",
|
| 118 |
+
"source_name": "delivery truck",
|
| 119 |
+
"source_description": "A large white box truck parked behind the fence on the right, with visible green and purple logos. Source dataset: BDD100K. Scene context: A view from a vehicle driving down a wide, paved road flanked by an industrial area with fences and delivery trucks under a cloudy sky.",
|
| 120 |
+
"sub_caption": "delivery truck: A large, plain white box delivery truck.. Scene role: Parked alongside the right curb in the background, past the intersection.",
|
| 121 |
+
"measured_bbox": [
|
| 122 |
+
0.576,
|
| 123 |
+
0.1929,
|
| 124 |
+
0.7135,
|
| 125 |
+
0.4081
|
| 126 |
+
],
|
| 127 |
+
"detection_confidence": 0.99,
|
| 128 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 129 |
+
"ref_image": "references/ref_delivery_truck.png",
|
| 130 |
+
"raw_ref_image": "references/raw_ref_delivery_truck_attempt_01.png",
|
| 131 |
+
"reference_verify": "references/reference_verify_delivery_truck.json",
|
| 132 |
+
"reference_verify_passed": true,
|
| 133 |
+
"reference_attempts": 1,
|
| 134 |
+
"sam_white_bg": {
|
| 135 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_delivery_truck_attempt_01.png",
|
| 136 |
+
"output": "references/ref_delivery_truck.png",
|
| 137 |
+
"mask": "references/sam_mask_delivery_truck.png",
|
| 138 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 139 |
+
"sam_model_type": "vit_b",
|
| 140 |
+
"sam_device": "auto",
|
| 141 |
+
"sam_working_size": [
|
| 142 |
+
640,
|
| 143 |
+
640
|
| 144 |
+
],
|
| 145 |
+
"sam_max_side": 640,
|
| 146 |
+
"sam_downscale": 0.625,
|
| 147 |
+
"prompt_box_xyxy": [
|
| 148 |
+
95.0,
|
| 149 |
+
100.0,
|
| 150 |
+
910.0,
|
| 151 |
+
932.0
|
| 152 |
+
],
|
| 153 |
+
"mask_score": 3.445823,
|
| 154 |
+
"mask_area_ratio": 0.476913,
|
| 155 |
+
"elapsed_seconds": 7.1923
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"name": "dark_parked_car",
|
| 160 |
+
"is_person": false,
|
| 161 |
+
"subject_type": "object",
|
| 162 |
+
"source_set": "obj_set",
|
| 163 |
+
"source_image_id": "BDD100K:bcd37eef-1b958ae3:object:5",
|
| 164 |
+
"source_name": "dark parked car",
|
| 165 |
+
"source_description": "Another dark-colored car parked on the right side of the street. Source dataset: BDD100K. Scene context: A nighttime view from a vehicle driving down a multi-lane city street with parked cars on the right and oncoming traffic on the left.",
|
| 166 |
+
"sub_caption": "dark parked car: A dark-colored sedan.. Scene role: Parked on the right side of the street near the sidewalk in the mid-ground.",
|
| 167 |
+
"measured_bbox": [
|
| 168 |
+
0.8414,
|
| 169 |
+
0.3717,
|
| 170 |
+
0.9967,
|
| 171 |
+
0.7454
|
| 172 |
+
],
|
| 173 |
+
"detection_confidence": 0.98,
|
| 174 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 175 |
+
"ref_image": "references/ref_dark_parked_car.png",
|
| 176 |
+
"raw_ref_image": "references/raw_ref_dark_parked_car_attempt_01.png",
|
| 177 |
+
"reference_verify": "references/reference_verify_dark_parked_car.json",
|
| 178 |
+
"reference_verify_passed": true,
|
| 179 |
+
"reference_attempts": 1,
|
| 180 |
+
"sam_white_bg": {
|
| 181 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_dark_parked_car_attempt_01.png",
|
| 182 |
+
"output": "references/ref_dark_parked_car.png",
|
| 183 |
+
"mask": "references/sam_mask_dark_parked_car.png",
|
| 184 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 185 |
+
"sam_model_type": "vit_b",
|
| 186 |
+
"sam_device": "auto",
|
| 187 |
+
"sam_working_size": [
|
| 188 |
+
640,
|
| 189 |
+
640
|
| 190 |
+
],
|
| 191 |
+
"sam_max_side": 640,
|
| 192 |
+
"sam_downscale": 0.625,
|
| 193 |
+
"prompt_box_xyxy": [
|
| 194 |
+
19.0,
|
| 195 |
+
336.0,
|
| 196 |
+
1003.0,
|
| 197 |
+
700.0
|
| 198 |
+
],
|
| 199 |
+
"mask_score": 3.408233,
|
| 200 |
+
"mask_area_ratio": 0.181406,
|
| 201 |
+
"elapsed_seconds": 8.4178
|
| 202 |
+
}
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"name": "street_lines",
|
| 206 |
+
"is_person": false,
|
| 207 |
+
"subject_type": "object",
|
| 208 |
+
"source_set": "obj_set",
|
| 209 |
+
"source_image_id": "BDD100K:c0c9ec9a-d3638a82:object:6",
|
| 210 |
+
"source_name": "street lines",
|
| 211 |
+
"source_description": "Double yellow lines separating traffic directions and white painted lines indicating lanes and crosswalks. Source dataset: BDD100K. Scene context: A view from inside a car driving on a city street, approaching an intersection underneath an elevated railway structure.",
|
| 212 |
+
"sub_caption": "street lines: Double yellow lines separating traffic directions and crisp white painted lines indicating lanes and a crosswalk.. Scene role: Painted on the asphalt road surface, extending from the foreground toward the intersection.",
|
| 213 |
+
"measured_bbox": [
|
| 214 |
+
0.003,
|
| 215 |
+
0.3541,
|
| 216 |
+
0.915,
|
| 217 |
+
0.8612
|
| 218 |
+
],
|
| 219 |
+
"detection_confidence": 0.99,
|
| 220 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 221 |
+
"ref_image": "references/ref_street_lines.png",
|
| 222 |
+
"raw_ref_image": "references/raw_ref_street_lines_attempt_01.png",
|
| 223 |
+
"reference_verify": "references/reference_verify_street_lines.json",
|
| 224 |
+
"reference_verify_passed": true,
|
| 225 |
+
"reference_attempts": 1,
|
| 226 |
+
"sam_white_bg": {
|
| 227 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_street_lines_attempt_01.png",
|
| 228 |
+
"output": "references/ref_street_lines.png",
|
| 229 |
+
"mask": "references/sam_mask_street_lines.png",
|
| 230 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 231 |
+
"sam_model_type": "vit_b",
|
| 232 |
+
"sam_device": "auto",
|
| 233 |
+
"sam_working_size": [
|
| 234 |
+
640,
|
| 235 |
+
640
|
| 236 |
+
],
|
| 237 |
+
"sam_max_side": 640,
|
| 238 |
+
"sam_downscale": 0.625,
|
| 239 |
+
"prompt_box_xyxy": [
|
| 240 |
+
38.0,
|
| 241 |
+
225.0,
|
| 242 |
+
985.0,
|
| 243 |
+
799.0
|
| 244 |
+
],
|
| 245 |
+
"mask_score": 3.287982,
|
| 246 |
+
"mask_area_ratio": 0.400985,
|
| 247 |
+
"elapsed_seconds": 7.2613
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
],
|
| 251 |
+
"not_emitted": [],
|
| 252 |
+
"model_ids": {
|
| 253 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 254 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 255 |
+
}
|
| 256 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000005.json
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000005",
|
| 3 |
+
"target_total": 6,
|
| 4 |
+
"target_people": 3,
|
| 5 |
+
"target_objects": 3,
|
| 6 |
+
"canvas_size": [
|
| 7 |
+
1248,
|
| 8 |
+
832
|
| 9 |
+
],
|
| 10 |
+
"canvas_aspect_ratio": "3:2",
|
| 11 |
+
"main_image": "main_image.png",
|
| 12 |
+
"bbox_overlay": "bbox_overlay.png",
|
| 13 |
+
"plan": "plan.json",
|
| 14 |
+
"detections": "detections.json",
|
| 15 |
+
"vocab_task": "vocab_task.json",
|
| 16 |
+
"n_planned": 6,
|
| 17 |
+
"n_detected": 6,
|
| 18 |
+
"n_subjects": 6,
|
| 19 |
+
"subjects": [
|
| 20 |
+
{
|
| 21 |
+
"name": "firefighter",
|
| 22 |
+
"is_person": true,
|
| 23 |
+
"subject_type": "person",
|
| 24 |
+
"source_set": "people_set",
|
| 25 |
+
"source_image_id": "CrowdHuman:data/data_2/282555,e0af90003451118a.jpg:person:8",
|
| 26 |
+
"source_name": "firefighter",
|
| 27 |
+
"source_description": "Wearing a dark uniform with yellow reflective stripes and a white helmet, standing facing away near the fire truck. Source dataset: CrowdHuman. Scene context: Emergency response personnel, including firefighters and ambulance crew, are gathered outside a large classical building with pillars and banners, accompanied by emergency vehicles.",
|
| 28 |
+
"sub_caption": "firefighter: Wearing a dark uniform with yellow reflective stripes and a white helmet, standing facing away.. Scene role: Assisting with incident management, positioned near the stopped car and barrier.",
|
| 29 |
+
"measured_bbox": [
|
| 30 |
+
0.2626,
|
| 31 |
+
0.3463,
|
| 32 |
+
0.3289,
|
| 33 |
+
0.6561
|
| 34 |
+
],
|
| 35 |
+
"detection_confidence": 0.99,
|
| 36 |
+
"ref_style": "white_bg_full_body_front",
|
| 37 |
+
"ref_image": "references/ref_firefighter.png",
|
| 38 |
+
"raw_ref_image": "references/raw_ref_firefighter_attempt_01.png",
|
| 39 |
+
"reference_verify": "references/reference_verify_firefighter.json",
|
| 40 |
+
"reference_verify_passed": true,
|
| 41 |
+
"reference_attempts": 1,
|
| 42 |
+
"sam_white_bg": {
|
| 43 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_firefighter_attempt_01.png",
|
| 44 |
+
"output": "references/ref_firefighter.png",
|
| 45 |
+
"mask": "references/sam_mask_firefighter.png",
|
| 46 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 47 |
+
"sam_model_type": "vit_b",
|
| 48 |
+
"sam_device": "auto",
|
| 49 |
+
"sam_working_size": [
|
| 50 |
+
640,
|
| 51 |
+
640
|
| 52 |
+
],
|
| 53 |
+
"sam_max_side": 640,
|
| 54 |
+
"sam_downscale": 0.625,
|
| 55 |
+
"prompt_box_xyxy": [
|
| 56 |
+
317.0,
|
| 57 |
+
34.0,
|
| 58 |
+
709.0,
|
| 59 |
+
1009.0
|
| 60 |
+
],
|
| 61 |
+
"mask_score": 3.445343,
|
| 62 |
+
"mask_area_ratio": 0.178691,
|
| 63 |
+
"elapsed_seconds": 7.0362
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "uniformed_officer",
|
| 68 |
+
"is_person": true,
|
| 69 |
+
"subject_type": "person",
|
| 70 |
+
"source_set": "people_set",
|
| 71 |
+
"source_image_id": "CrowdHuman:data/data_1/273275,f68c20007e0bf148.jpg:person:3",
|
| 72 |
+
"source_name": "uniformed officer",
|
| 73 |
+
"source_description": "wearing a khaki uniform and helmet, holding a baton, looking towards the left Source dataset: CrowdHuman. Scene context: A large crowd of people, including some in uniform with batons and helmets, stands in front of a red and yellow building.",
|
| 74 |
+
"sub_caption": "uniformed officer: Wearing a khaki uniform and helmet, holding a baton, looking towards the left.. Scene role: Directing surrounding traffic away from the stopped vehicle using a baton.",
|
| 75 |
+
"measured_bbox": [
|
| 76 |
+
0.0497,
|
| 77 |
+
0.3566,
|
| 78 |
+
0.1691,
|
| 79 |
+
0.6118
|
| 80 |
+
],
|
| 81 |
+
"detection_confidence": 0.99,
|
| 82 |
+
"ref_style": "white_bg_full_body_front",
|
| 83 |
+
"ref_image": "references/ref_uniformed_officer.png",
|
| 84 |
+
"raw_ref_image": "references/raw_ref_uniformed_officer_attempt_01.png",
|
| 85 |
+
"reference_verify": "references/reference_verify_uniformed_officer.json",
|
| 86 |
+
"reference_verify_passed": true,
|
| 87 |
+
"reference_attempts": 1,
|
| 88 |
+
"sam_white_bg": {
|
| 89 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_uniformed_officer_attempt_01.png",
|
| 90 |
+
"output": "references/ref_uniformed_officer.png",
|
| 91 |
+
"mask": "references/sam_mask_uniformed_officer.png",
|
| 92 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 93 |
+
"sam_model_type": "vit_b",
|
| 94 |
+
"sam_device": "auto",
|
| 95 |
+
"sam_working_size": [
|
| 96 |
+
640,
|
| 97 |
+
640
|
| 98 |
+
],
|
| 99 |
+
"sam_max_side": 640,
|
| 100 |
+
"sam_downscale": 0.625,
|
| 101 |
+
"prompt_box_xyxy": [
|
| 102 |
+
331.0,
|
| 103 |
+
24.0,
|
| 104 |
+
689.0,
|
| 105 |
+
1005.0
|
| 106 |
+
],
|
| 107 |
+
"mask_score": 3.475629,
|
| 108 |
+
"mask_area_ratio": 0.156165,
|
| 109 |
+
"elapsed_seconds": 7.0984
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "bystander_in_suit",
|
| 114 |
+
"is_person": true,
|
| 115 |
+
"subject_type": "person",
|
| 116 |
+
"source_set": "people_set",
|
| 117 |
+
"source_image_id": "CrowdHuman:data/data_28/273278,b62280001bda6f1a.jpg:person:19",
|
| 118 |
+
"source_name": "crowd member",
|
| 119 |
+
"source_description": "A person far right in the background wearing a suit. Source dataset: CrowdHuman. Scene context: A crowd of people gathers in front of an old stone building with a prominent arched doorway and two large animal statues.",
|
| 120 |
+
"sub_caption": "crowd member: A person wearing a suit.. Scene role: Standing off to the right side of the road behind the barrier, acting as the driver or an involved pedestrian.",
|
| 121 |
+
"measured_bbox": [
|
| 122 |
+
0.7467,
|
| 123 |
+
0.3318,
|
| 124 |
+
0.8036,
|
| 125 |
+
0.5111
|
| 126 |
+
],
|
| 127 |
+
"detection_confidence": 0.98,
|
| 128 |
+
"ref_style": "white_bg_full_body_front",
|
| 129 |
+
"ref_image": "references/ref_bystander_in_suit.png",
|
| 130 |
+
"raw_ref_image": "references/raw_ref_bystander_in_suit_attempt_01.png",
|
| 131 |
+
"reference_verify": "references/reference_verify_bystander_in_suit.json",
|
| 132 |
+
"reference_verify_passed": true,
|
| 133 |
+
"reference_attempts": 1,
|
| 134 |
+
"sam_white_bg": {
|
| 135 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_bystander_in_suit_attempt_01.png",
|
| 136 |
+
"output": "references/ref_bystander_in_suit.png",
|
| 137 |
+
"mask": "references/sam_mask_bystander_in_suit.png",
|
| 138 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 139 |
+
"sam_model_type": "vit_b",
|
| 140 |
+
"sam_device": "auto",
|
| 141 |
+
"sam_working_size": [
|
| 142 |
+
640,
|
| 143 |
+
640
|
| 144 |
+
],
|
| 145 |
+
"sam_max_side": 640,
|
| 146 |
+
"sam_downscale": 0.625,
|
| 147 |
+
"prompt_box_xyxy": [
|
| 148 |
+
341.0,
|
| 149 |
+
59.0,
|
| 150 |
+
677.0,
|
| 151 |
+
996.0
|
| 152 |
+
],
|
| 153 |
+
"mask_score": 3.480669,
|
| 154 |
+
"mask_area_ratio": 0.144797,
|
| 155 |
+
"elapsed_seconds": 7.0242
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"name": "traffic_light",
|
| 160 |
+
"is_person": false,
|
| 161 |
+
"subject_type": "object",
|
| 162 |
+
"source_set": "obj_set",
|
| 163 |
+
"source_image_id": "CrowdHuman:data/data_36/273275,6a11d000f52c34a9.jpg:object:0",
|
| 164 |
+
"source_name": "traffic light",
|
| 165 |
+
"source_description": "A black multi-lens traffic light fixture mounted on a pole above the street. Source dataset: CrowdHuman. Scene context: A male tour guide is speaking to a group of people standing on a city sidewalk next to a road crossing.",
|
| 166 |
+
"sub_caption": "traffic light: A black multi-lens traffic light fixture mounted on a pole above the street.. Scene role: Hanging overhead or mounted prominently on a pole at the intersection.",
|
| 167 |
+
"measured_bbox": [
|
| 168 |
+
0.5381,
|
| 169 |
+
0.0316,
|
| 170 |
+
0.5856,
|
| 171 |
+
0.2076
|
| 172 |
+
],
|
| 173 |
+
"detection_confidence": 0.99,
|
| 174 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 175 |
+
"ref_image": "references/ref_traffic_light.png",
|
| 176 |
+
"raw_ref_image": "references/raw_ref_traffic_light_attempt_01.png",
|
| 177 |
+
"reference_verify": "references/reference_verify_traffic_light.json",
|
| 178 |
+
"reference_verify_passed": true,
|
| 179 |
+
"reference_attempts": 1,
|
| 180 |
+
"sam_white_bg": {
|
| 181 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_traffic_light_attempt_01.png",
|
| 182 |
+
"output": "references/ref_traffic_light.png",
|
| 183 |
+
"mask": "references/sam_mask_traffic_light.png",
|
| 184 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 185 |
+
"sam_model_type": "vit_b",
|
| 186 |
+
"sam_device": "auto",
|
| 187 |
+
"sam_working_size": [
|
| 188 |
+
640,
|
| 189 |
+
640
|
| 190 |
+
],
|
| 191 |
+
"sam_max_side": 640,
|
| 192 |
+
"sam_downscale": 0.625,
|
| 193 |
+
"prompt_box_xyxy": [
|
| 194 |
+
272.0,
|
| 195 |
+
15.0,
|
| 196 |
+
750.0,
|
| 197 |
+
1006.0
|
| 198 |
+
],
|
| 199 |
+
"mask_score": 3.448339,
|
| 200 |
+
"mask_area_ratio": 0.303974,
|
| 201 |
+
"elapsed_seconds": 8.3734
|
| 202 |
+
}
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"name": "concrete_barrier",
|
| 206 |
+
"is_person": false,
|
| 207 |
+
"subject_type": "object",
|
| 208 |
+
"source_set": "obj_set",
|
| 209 |
+
"source_image_id": "BDD100K:c946c532-07177e0a:object:11",
|
| 210 |
+
"source_name": "concrete barrier",
|
| 211 |
+
"source_description": "A continuous low concrete wall acting as a barrier on the right side of the road. Source dataset: BDD100K. Scene context: View from inside a vehicle driving on a multi-lane highway during the day, with construction or industrial sites visible alongside.",
|
| 212 |
+
"sub_caption": "concrete barrier: A continuous low concrete wall acting as a barrier on the right side of the road.. Scene role: Lining the right side of the street, separating the pedestrian walkway or construction zone from the active traffic lane.",
|
| 213 |
+
"measured_bbox": [
|
| 214 |
+
0.6322,
|
| 215 |
+
0.4972,
|
| 216 |
+
0.9964,
|
| 217 |
+
0.6985
|
| 218 |
+
],
|
| 219 |
+
"detection_confidence": 0.99,
|
| 220 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 221 |
+
"ref_image": "references/ref_concrete_barrier.png",
|
| 222 |
+
"raw_ref_image": "references/raw_ref_concrete_barrier_attempt_01.png",
|
| 223 |
+
"reference_verify": "references/reference_verify_concrete_barrier.json",
|
| 224 |
+
"reference_verify_passed": true,
|
| 225 |
+
"reference_attempts": 1,
|
| 226 |
+
"sam_white_bg": {
|
| 227 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_concrete_barrier_attempt_01.png",
|
| 228 |
+
"output": "references/ref_concrete_barrier.png",
|
| 229 |
+
"mask": "references/sam_mask_concrete_barrier.png",
|
| 230 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 231 |
+
"sam_model_type": "vit_b",
|
| 232 |
+
"sam_device": "auto",
|
| 233 |
+
"sam_working_size": [
|
| 234 |
+
640,
|
| 235 |
+
640
|
| 236 |
+
],
|
| 237 |
+
"sam_max_side": 640,
|
| 238 |
+
"sam_downscale": 0.625,
|
| 239 |
+
"prompt_box_xyxy": [
|
| 240 |
+
53.0,
|
| 241 |
+
219.0,
|
| 242 |
+
970.0,
|
| 243 |
+
811.0
|
| 244 |
+
],
|
| 245 |
+
"mask_score": 3.469119,
|
| 246 |
+
"mask_area_ratio": 0.3653,
|
| 247 |
+
"elapsed_seconds": 7.0274
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"name": "silver_car",
|
| 252 |
+
"is_person": false,
|
| 253 |
+
"subject_type": "object",
|
| 254 |
+
"source_set": "obj_set",
|
| 255 |
+
"source_image_id": "BDD100K:be3d3a81-326a032d:object:0",
|
| 256 |
+
"source_name": "silver car",
|
| 257 |
+
"source_description": "A silver compact hatchback car facing forward, waiting at an intersection. Its brake lights are on. Source dataset: BDD100K. Scene context: A rainy street scene showing cars waiting at an intersection surrounded by tall buildings and urban infrastructure.",
|
| 258 |
+
"sub_caption": "silver car: A silver compact hatchback car facing forward, waiting at an intersection with illuminated brake lights.. Scene role: Stopped in the active lane near the barrier, serving as the focal point of the traffic response.",
|
| 259 |
+
"measured_bbox": [
|
| 260 |
+
0.3396,
|
| 261 |
+
0.3754,
|
| 262 |
+
0.6399,
|
| 263 |
+
0.6647
|
| 264 |
+
],
|
| 265 |
+
"detection_confidence": 0.99,
|
| 266 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 267 |
+
"ref_image": "references/ref_silver_car.png",
|
| 268 |
+
"raw_ref_image": "references/raw_ref_silver_car_attempt_01.png",
|
| 269 |
+
"reference_verify": "references/reference_verify_silver_car.json",
|
| 270 |
+
"reference_verify_passed": true,
|
| 271 |
+
"reference_attempts": 1,
|
| 272 |
+
"sam_white_bg": {
|
| 273 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_silver_car_attempt_01.png",
|
| 274 |
+
"output": "references/ref_silver_car.png",
|
| 275 |
+
"mask": "references/sam_mask_silver_car.png",
|
| 276 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 277 |
+
"sam_model_type": "vit_b",
|
| 278 |
+
"sam_device": "auto",
|
| 279 |
+
"sam_working_size": [
|
| 280 |
+
640,
|
| 281 |
+
640
|
| 282 |
+
],
|
| 283 |
+
"sam_max_side": 640,
|
| 284 |
+
"sam_downscale": 0.625,
|
| 285 |
+
"prompt_box_xyxy": [
|
| 286 |
+
46.0,
|
| 287 |
+
215.0,
|
| 288 |
+
976.0,
|
| 289 |
+
829.0
|
| 290 |
+
],
|
| 291 |
+
"mask_score": 3.457698,
|
| 292 |
+
"mask_area_ratio": 0.330622,
|
| 293 |
+
"elapsed_seconds": 7.0933
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
],
|
| 297 |
+
"not_emitted": [],
|
| 298 |
+
"model_ids": {
|
| 299 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 300 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 301 |
+
}
|
| 302 |
+
}
|
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000006.json
ADDED
|
@@ -0,0 +1,394 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sample_id": "sample_000006",
|
| 3 |
+
"target_total": 8,
|
| 4 |
+
"target_people": 1,
|
| 5 |
+
"target_objects": 7,
|
| 6 |
+
"canvas_size": [
|
| 7 |
+
1280,
|
| 8 |
+
720
|
| 9 |
+
],
|
| 10 |
+
"canvas_aspect_ratio": "16:9",
|
| 11 |
+
"main_image": "main_image.png",
|
| 12 |
+
"bbox_overlay": "bbox_overlay.png",
|
| 13 |
+
"plan": "plan.json",
|
| 14 |
+
"detections": "detections.json",
|
| 15 |
+
"vocab_task": "vocab_task.json",
|
| 16 |
+
"n_planned": 8,
|
| 17 |
+
"n_detected": 8,
|
| 18 |
+
"n_subjects": 8,
|
| 19 |
+
"subjects": [
|
| 20 |
+
{
|
| 21 |
+
"name": "distant_pedestrian",
|
| 22 |
+
"is_person": true,
|
| 23 |
+
"subject_type": "person",
|
| 24 |
+
"source_set": "people_set",
|
| 25 |
+
"source_image_id": "CrowdHuman:data/data_73/283991,17cd800008079067.jpg:person:18",
|
| 26 |
+
"source_name": "pedestrian",
|
| 27 |
+
"source_description": "Another person in the distant background near the green structure. Source dataset: CrowdHuman. Scene context: A large crowd of people walking across a street with trees, streetlamps, and classic architecture in the background.",
|
| 28 |
+
"sub_caption": "pedestrian: A person walking across the street in the distant background.. Scene role: Crossing the crosswalk in the distance ahead of the approaching vehicles.",
|
| 29 |
+
"measured_bbox": [
|
| 30 |
+
0.3877,
|
| 31 |
+
0.478,
|
| 32 |
+
0.4204,
|
| 33 |
+
0.5881
|
| 34 |
+
],
|
| 35 |
+
"detection_confidence": 0.9,
|
| 36 |
+
"ref_style": "white_bg_full_body_front",
|
| 37 |
+
"ref_image": "references/ref_distant_pedestrian.png",
|
| 38 |
+
"raw_ref_image": "references/raw_ref_distant_pedestrian_attempt_01.png",
|
| 39 |
+
"reference_verify": "references/reference_verify_distant_pedestrian.json",
|
| 40 |
+
"reference_verify_passed": true,
|
| 41 |
+
"reference_attempts": 1,
|
| 42 |
+
"sam_white_bg": {
|
| 43 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_distant_pedestrian_attempt_01.png",
|
| 44 |
+
"output": "references/ref_distant_pedestrian.png",
|
| 45 |
+
"mask": "references/sam_mask_distant_pedestrian.png",
|
| 46 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 47 |
+
"sam_model_type": "vit_b",
|
| 48 |
+
"sam_device": "auto",
|
| 49 |
+
"sam_working_size": [
|
| 50 |
+
640,
|
| 51 |
+
640
|
| 52 |
+
],
|
| 53 |
+
"sam_max_side": 640,
|
| 54 |
+
"sam_downscale": 0.625,
|
| 55 |
+
"prompt_box_xyxy": [
|
| 56 |
+
324.0,
|
| 57 |
+
9.0,
|
| 58 |
+
705.0,
|
| 59 |
+
1015.0
|
| 60 |
+
],
|
| 61 |
+
"mask_score": 3.338419,
|
| 62 |
+
"mask_area_ratio": 0.174056,
|
| 63 |
+
"elapsed_seconds": 8.694
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "vertical_illuminated_sign",
|
| 68 |
+
"is_person": false,
|
| 69 |
+
"subject_type": "object",
|
| 70 |
+
"source_set": "obj_set",
|
| 71 |
+
"source_image_id": "BDD100K:bb1b7e42-9608265e:object:6",
|
| 72 |
+
"source_name": "street sign",
|
| 73 |
+
"source_description": "A vertical 'PARK' sign illuminated on the right side of the street, indicating a parking garage. Source dataset: BDD100K. Scene context: A dashcam view from a vehicle driving down a city street with tall buildings on both sides, following a yellow taxi, with other cars parked and driving.",
|
| 74 |
+
"sub_caption": "street sign: A vertical illuminated neon sign with abstract shapes, glowing brightly.. Scene role: Mounted on the building facade on the right side of the street, adding ambient night lighting.",
|
| 75 |
+
"measured_bbox": [
|
| 76 |
+
0.7683,
|
| 77 |
+
0.0355,
|
| 78 |
+
0.8177,
|
| 79 |
+
0.2837
|
| 80 |
+
],
|
| 81 |
+
"detection_confidence": 0.99,
|
| 82 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 83 |
+
"ref_image": "references/ref_vertical_illuminated_sign.png",
|
| 84 |
+
"raw_ref_image": "references/raw_ref_vertical_illuminated_sign_attempt_01.png",
|
| 85 |
+
"reference_verify": "references/reference_verify_vertical_illuminated_sign.json",
|
| 86 |
+
"reference_verify_passed": true,
|
| 87 |
+
"reference_attempts": 1,
|
| 88 |
+
"sam_white_bg": {
|
| 89 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_vertical_illuminated_sign_attempt_01.png",
|
| 90 |
+
"output": "references/ref_vertical_illuminated_sign.png",
|
| 91 |
+
"mask": "references/sam_mask_vertical_illuminated_sign.png",
|
| 92 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 93 |
+
"sam_model_type": "vit_b",
|
| 94 |
+
"sam_device": "auto",
|
| 95 |
+
"sam_working_size": [
|
| 96 |
+
640,
|
| 97 |
+
640
|
| 98 |
+
],
|
| 99 |
+
"sam_max_side": 640,
|
| 100 |
+
"sam_downscale": 0.625,
|
| 101 |
+
"prompt_box_xyxy": [
|
| 102 |
+
305.0,
|
| 103 |
+
20.0,
|
| 104 |
+
728.0,
|
| 105 |
+
1002.0
|
| 106 |
+
],
|
| 107 |
+
"mask_score": 3.37343,
|
| 108 |
+
"mask_area_ratio": 0.273593,
|
| 109 |
+
"elapsed_seconds": 7.1332
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "emergency_vehicle",
|
| 114 |
+
"is_person": false,
|
| 115 |
+
"subject_type": "object",
|
| 116 |
+
"source_set": "obj_set",
|
| 117 |
+
"source_image_id": "BDD100K:b99f250d-886111c5:object:5",
|
| 118 |
+
"source_name": "vehicle",
|
| 119 |
+
"source_description": "A dark-colored vehicle partially visible in the left background with blue emergency lights flashing. Source dataset: BDD100K. Scene context: A nighttime city street intersection showing a crosswalk, construction barriers, and illuminated traffic signals.",
|
| 120 |
+
"sub_caption": "vehicle: A dark-colored vehicle with bright blue emergency lights flashing.. Scene role: Parked on the left side of the street near the intersection.",
|
| 121 |
+
"measured_bbox": [
|
| 122 |
+
0.1031,
|
| 123 |
+
0.4564,
|
| 124 |
+
0.2827,
|
| 125 |
+
0.6497
|
| 126 |
+
],
|
| 127 |
+
"detection_confidence": 0.95,
|
| 128 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 129 |
+
"ref_image": "references/ref_emergency_vehicle.png",
|
| 130 |
+
"raw_ref_image": "references/raw_ref_emergency_vehicle_attempt_01.png",
|
| 131 |
+
"reference_verify": "references/reference_verify_emergency_vehicle.json",
|
| 132 |
+
"reference_verify_passed": true,
|
| 133 |
+
"reference_attempts": 1,
|
| 134 |
+
"sam_white_bg": {
|
| 135 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_emergency_vehicle_attempt_01.png",
|
| 136 |
+
"output": "references/ref_emergency_vehicle.png",
|
| 137 |
+
"mask": "references/sam_mask_emergency_vehicle.png",
|
| 138 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 139 |
+
"sam_model_type": "vit_b",
|
| 140 |
+
"sam_device": "auto",
|
| 141 |
+
"sam_working_size": [
|
| 142 |
+
640,
|
| 143 |
+
640
|
| 144 |
+
],
|
| 145 |
+
"sam_max_side": 640,
|
| 146 |
+
"sam_downscale": 0.625,
|
| 147 |
+
"prompt_box_xyxy": [
|
| 148 |
+
8.0,
|
| 149 |
+
237.0,
|
| 150 |
+
1015.0,
|
| 151 |
+
828.0
|
| 152 |
+
],
|
| 153 |
+
"mask_score": 3.468468,
|
| 154 |
+
"mask_area_ratio": 0.355034,
|
| 155 |
+
"elapsed_seconds": 7.0896
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"name": "white_panel_van",
|
| 160 |
+
"is_person": false,
|
| 161 |
+
"subject_type": "object",
|
| 162 |
+
"source_set": "obj_set",
|
| 163 |
+
"source_image_id": "BDD100K:b5047c50-e1facff6:object:2",
|
| 164 |
+
"source_name": "white van",
|
| 165 |
+
"source_description": "A large white panel van with red taillights illuminated, driving in the right lane ahead. Source dataset: BDD100K. Scene context: View from inside a vehicle driving down a multi-lane city street on a sunny day with moderate traffic.",
|
| 166 |
+
"sub_caption": "white van: A large white panel van with red taillights illuminated.. Scene role: Driving in the lane directly ahead of the camera perspective.",
|
| 167 |
+
"measured_bbox": [
|
| 168 |
+
0.4556,
|
| 169 |
+
0.3288,
|
| 170 |
+
0.5926,
|
| 171 |
+
0.6597
|
| 172 |
+
],
|
| 173 |
+
"detection_confidence": 0.99,
|
| 174 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 175 |
+
"ref_image": "references/ref_white_panel_van.png",
|
| 176 |
+
"raw_ref_image": "references/raw_ref_white_panel_van_attempt_01.png",
|
| 177 |
+
"reference_verify": "references/reference_verify_white_panel_van.json",
|
| 178 |
+
"reference_verify_passed": true,
|
| 179 |
+
"reference_attempts": 1,
|
| 180 |
+
"sam_white_bg": {
|
| 181 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_white_panel_van_attempt_01.png",
|
| 182 |
+
"output": "references/ref_white_panel_van.png",
|
| 183 |
+
"mask": "references/sam_mask_white_panel_van.png",
|
| 184 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 185 |
+
"sam_model_type": "vit_b",
|
| 186 |
+
"sam_device": "auto",
|
| 187 |
+
"sam_working_size": [
|
| 188 |
+
640,
|
| 189 |
+
640
|
| 190 |
+
],
|
| 191 |
+
"sam_max_side": 640,
|
| 192 |
+
"sam_downscale": 0.625,
|
| 193 |
+
"prompt_box_xyxy": [
|
| 194 |
+
181.0,
|
| 195 |
+
63.0,
|
| 196 |
+
843.0,
|
| 197 |
+
937.0
|
| 198 |
+
],
|
| 199 |
+
"mask_score": 2.636854,
|
| 200 |
+
"mask_area_ratio": 0.376409,
|
| 201 |
+
"elapsed_seconds": 7.1379
|
| 202 |
+
}
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"name": "double_solid_line",
|
| 206 |
+
"is_person": false,
|
| 207 |
+
"subject_type": "object",
|
| 208 |
+
"source_set": "obj_set",
|
| 209 |
+
"source_image_id": "BDD100K:bc886d37-5b22c313:object:7",
|
| 210 |
+
"source_name": "double solid white line",
|
| 211 |
+
"source_description": "Two continuous white painted lines on the dark asphalt road surface, separating the two lanes of traffic. Source dataset: BDD100K. Scene context: A view from inside a car driving through a brightly lit tunnel with tiled walls, following a silver SUV and a dark compact car.",
|
| 212 |
+
"sub_caption": "double solid white line: Two continuous white painted lines on the dark asphalt road surface.. Scene role: Separating the traffic lanes on the dark road, leading toward the intersection.",
|
| 213 |
+
"measured_bbox": [
|
| 214 |
+
0.1922,
|
| 215 |
+
0.6133,
|
| 216 |
+
0.4541,
|
| 217 |
+
1.0
|
| 218 |
+
],
|
| 219 |
+
"detection_confidence": 0.95,
|
| 220 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 221 |
+
"ref_image": "references/ref_double_solid_line.png",
|
| 222 |
+
"raw_ref_image": "references/raw_ref_double_solid_line_attempt_01.png",
|
| 223 |
+
"reference_verify": "references/reference_verify_double_solid_line.json",
|
| 224 |
+
"reference_verify_passed": true,
|
| 225 |
+
"reference_attempts": 1,
|
| 226 |
+
"sam_white_bg": {
|
| 227 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_double_solid_line_attempt_01.png",
|
| 228 |
+
"output": "references/ref_double_solid_line.png",
|
| 229 |
+
"mask": "references/sam_mask_double_solid_line.png",
|
| 230 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 231 |
+
"sam_model_type": "vit_b",
|
| 232 |
+
"sam_device": "auto",
|
| 233 |
+
"sam_working_size": [
|
| 234 |
+
640,
|
| 235 |
+
640
|
| 236 |
+
],
|
| 237 |
+
"sam_max_side": 640,
|
| 238 |
+
"sam_downscale": 0.625,
|
| 239 |
+
"prompt_box_xyxy": [
|
| 240 |
+
22.0,
|
| 241 |
+
186.0,
|
| 242 |
+
1001.0,
|
| 243 |
+
837.0
|
| 244 |
+
],
|
| 245 |
+
"mask_score": 3.460181,
|
| 246 |
+
"mask_area_ratio": 0.372935,
|
| 247 |
+
"elapsed_seconds": 8.3174
|
| 248 |
+
}
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"name": "dark_building_facade",
|
| 252 |
+
"is_person": false,
|
| 253 |
+
"subject_type": "object",
|
| 254 |
+
"source_set": "obj_set",
|
| 255 |
+
"source_image_id": "BDD100K:c807cb19-7e09cb11:object:8",
|
| 256 |
+
"source_name": "building facade",
|
| 257 |
+
"source_description": "Dark outlines of buildings lining the street on both sides, with some lit windows. Source dataset: BDD100K. Scene context: Nighttime driving view on a multi-lane city street with traffic lights and vehicles ahead.",
|
| 258 |
+
"sub_caption": "building facade: Dark outlines of buildings with scattered, warm-toned lit windows.. Scene role: Forming the urban backdrop along the left side of the street.",
|
| 259 |
+
"measured_bbox": [
|
| 260 |
+
0.1397,
|
| 261 |
+
0.0,
|
| 262 |
+
0.366,
|
| 263 |
+
0.5427
|
| 264 |
+
],
|
| 265 |
+
"detection_confidence": 0.8,
|
| 266 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 267 |
+
"ref_image": "references/ref_dark_building_facade.png",
|
| 268 |
+
"raw_ref_image": "references/raw_ref_dark_building_facade_attempt_01.png",
|
| 269 |
+
"reference_verify": "references/reference_verify_dark_building_facade.json",
|
| 270 |
+
"reference_verify_passed": true,
|
| 271 |
+
"reference_attempts": 1,
|
| 272 |
+
"sam_white_bg": {
|
| 273 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_dark_building_facade_attempt_01.png",
|
| 274 |
+
"output": "references/ref_dark_building_facade.png",
|
| 275 |
+
"mask": "references/sam_mask_dark_building_facade.png",
|
| 276 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 277 |
+
"sam_model_type": "vit_b",
|
| 278 |
+
"sam_device": "auto",
|
| 279 |
+
"sam_working_size": [
|
| 280 |
+
640,
|
| 281 |
+
640
|
| 282 |
+
],
|
| 283 |
+
"sam_max_side": 640,
|
| 284 |
+
"sam_downscale": 0.625,
|
| 285 |
+
"prompt_box_xyxy": [
|
| 286 |
+
128.0,
|
| 287 |
+
0.0,
|
| 288 |
+
887.0,
|
| 289 |
+
1000.0
|
| 290 |
+
],
|
| 291 |
+
"mask_score": 2.829968,
|
| 292 |
+
"mask_area_ratio": 0.624767,
|
| 293 |
+
"elapsed_seconds": 7.1675
|
| 294 |
+
}
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"name": "awning_building_corner",
|
| 298 |
+
"is_person": false,
|
| 299 |
+
"subject_type": "object",
|
| 300 |
+
"source_set": "obj_set",
|
| 301 |
+
"source_image_id": "BDD100K:c06d23aa-cb9ae751:object:6",
|
| 302 |
+
"source_name": "building corner",
|
| 303 |
+
"source_description": "The corner of a building on the right side, with an orange or red awning and some lit signs. Source dataset: BDD100K. Scene context: Nighttime driving scene at an intersection with a stop sign and a large black SUV passing on the right.",
|
| 304 |
+
"sub_caption": "building corner: The corner of a building featuring an awning and brightly lit abstract signboards.. Scene role: Anchoring the right side of the intersection with a warm architectural glow.",
|
| 305 |
+
"measured_bbox": [
|
| 306 |
+
0.6102,
|
| 307 |
+
0.3347,
|
| 308 |
+
0.7867,
|
| 309 |
+
0.5412
|
| 310 |
+
],
|
| 311 |
+
"detection_confidence": 0.9,
|
| 312 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 313 |
+
"ref_image": "references/ref_awning_building_corner.png",
|
| 314 |
+
"raw_ref_image": "references/raw_ref_awning_building_corner_attempt_01.png",
|
| 315 |
+
"reference_verify": "references/reference_verify_awning_building_corner.json",
|
| 316 |
+
"reference_verify_passed": true,
|
| 317 |
+
"reference_attempts": 1,
|
| 318 |
+
"sam_white_bg": {
|
| 319 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_awning_building_corner_attempt_01.png",
|
| 320 |
+
"output": "references/ref_awning_building_corner.png",
|
| 321 |
+
"mask": "references/sam_mask_awning_building_corner.png",
|
| 322 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 323 |
+
"sam_model_type": "vit_b",
|
| 324 |
+
"sam_device": "auto",
|
| 325 |
+
"sam_working_size": [
|
| 326 |
+
640,
|
| 327 |
+
640
|
| 328 |
+
],
|
| 329 |
+
"sam_max_side": 640,
|
| 330 |
+
"sam_downscale": 0.625,
|
| 331 |
+
"prompt_box_xyxy": [
|
| 332 |
+
27.0,
|
| 333 |
+
27.0,
|
| 334 |
+
975.0,
|
| 335 |
+
980.0
|
| 336 |
+
],
|
| 337 |
+
"mask_score": 3.458235,
|
| 338 |
+
"mask_area_ratio": 0.594922,
|
| 339 |
+
"elapsed_seconds": 7.3072
|
| 340 |
+
}
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"name": "green_street_sign",
|
| 344 |
+
"is_person": false,
|
| 345 |
+
"subject_type": "object",
|
| 346 |
+
"source_set": "obj_set",
|
| 347 |
+
"source_image_id": "CrowdHuman:data/data_50/273278,febe100057ca94db.jpg:object:5",
|
| 348 |
+
"source_name": "street sign",
|
| 349 |
+
"source_description": "A green street sign with white text visible on the left side. Source dataset: CrowdHuman. Scene context: Several pedestrians are crossing a street at a crosswalk on a sunny day.",
|
| 350 |
+
"sub_caption": "street sign: A standard green street sign without any readable text.. Scene role: Hanging from a traffic light pole near the intersection.",
|
| 351 |
+
"measured_bbox": [
|
| 352 |
+
0.5754,
|
| 353 |
+
0.1583,
|
| 354 |
+
0.6522,
|
| 355 |
+
0.1884
|
| 356 |
+
],
|
| 357 |
+
"detection_confidence": 100,
|
| 358 |
+
"ref_style": "white_bg_encyclopedia_photo",
|
| 359 |
+
"ref_image": "references/ref_green_street_sign.png",
|
| 360 |
+
"raw_ref_image": "references/raw_ref_green_street_sign_attempt_01.png",
|
| 361 |
+
"reference_verify": "references/reference_verify_green_street_sign.json",
|
| 362 |
+
"reference_verify_passed": true,
|
| 363 |
+
"reference_attempts": 1,
|
| 364 |
+
"sam_white_bg": {
|
| 365 |
+
"input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_green_street_sign_attempt_01.png",
|
| 366 |
+
"output": "references/ref_green_street_sign.png",
|
| 367 |
+
"mask": "references/sam_mask_green_street_sign.png",
|
| 368 |
+
"sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
|
| 369 |
+
"sam_model_type": "vit_b",
|
| 370 |
+
"sam_device": "auto",
|
| 371 |
+
"sam_working_size": [
|
| 372 |
+
640,
|
| 373 |
+
640
|
| 374 |
+
],
|
| 375 |
+
"sam_max_side": 640,
|
| 376 |
+
"sam_downscale": 0.625,
|
| 377 |
+
"prompt_box_xyxy": [
|
| 378 |
+
61.0,
|
| 379 |
+
378.0,
|
| 380 |
+
962.0,
|
| 381 |
+
645.0
|
| 382 |
+
],
|
| 383 |
+
"mask_score": 3.379525,
|
| 384 |
+
"mask_area_ratio": 0.536634,
|
| 385 |
+
"elapsed_seconds": 7.1734
|
| 386 |
+
}
|
| 387 |
+
}
|
| 388 |
+
],
|
| 389 |
+
"not_emitted": [],
|
| 390 |
+
"model_ids": {
|
| 391 |
+
"chat_model": "gcp/google/gemini-3.1-pro-preview",
|
| 392 |
+
"image_model": "gcp/google/gemini-3-pro-image-preview"
|
| 393 |
+
}
|
| 394 |
+
}
|