Upload folder using huggingface_hub

#3
by kimi000 - opened
This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +170 -0
  2. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/README.md +16 -0
  3. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.json +0 -0
  4. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.jsonl +0 -0
  5. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000001.json +16 -0
  6. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000002.json +16 -0
  7. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000003.json +16 -0
  8. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000004.json +16 -0
  9. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000005.json +16 -0
  10. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000006.json +16 -0
  11. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000007.json +16 -0
  12. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000008.json +22 -0
  13. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000009.json +16 -0
  14. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000010.json +16 -0
  15. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000001.json +4 -0
  16. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000002.json +4 -0
  17. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000003.json +4 -0
  18. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000004.json +4 -0
  19. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000005.json +4 -0
  20. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000006.json +4 -0
  21. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000007.json +4 -0
  22. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000008.json +4 -0
  23. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000009.json +4 -0
  24. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000010.json +4 -0
  25. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000001.json +14 -0
  26. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000002.json +14 -0
  27. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000003.json +14 -0
  28. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000004.json +14 -0
  29. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000005.json +14 -0
  30. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000006.json +14 -0
  31. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000007.json +14 -0
  32. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000008.json +14 -0
  33. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000009.json +14 -0
  34. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000010.json +14 -0
  35. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000001.json +18 -0
  36. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000002.json +18 -0
  37. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000003.json +18 -0
  38. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000004.json +18 -0
  39. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000005.json +18 -0
  40. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000006.json +18 -0
  41. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000007.json +18 -0
  42. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000008.json +18 -0
  43. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000009.json +18 -0
  44. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000010.json +18 -0
  45. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000001.json +164 -0
  46. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000002.json +716 -0
  47. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000003.json +164 -0
  48. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000004.json +256 -0
  49. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000005.json +302 -0
  50. samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000006.json +394 -0
.gitattributes CHANGED
@@ -52,3 +52,173 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
52
  10samples/sample_0007/overlays/overlay_accepted.png filter=lfs diff=lfs merge=lfs -text
53
  10samples/sample_0007/overlays/overlay_intended.png filter=lfs diff=lfs merge=lfs -text
54
  10samples/sample_0007/overlays/overlay_measured.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  10samples/sample_0007/overlays/overlay_accepted.png filter=lfs diff=lfs merge=lfs -text
53
  10samples/sample_0007/overlays/overlay_intended.png filter=lfs diff=lfs merge=lfs -text
54
  10samples/sample_0007/overlays/overlay_measured.png filter=lfs diff=lfs merge=lfs -text
55
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
56
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/detect_refine_metal_barrier.png filter=lfs diff=lfs merge=lfs -text
57
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/detect_refine_parked_dark_car.png filter=lfs diff=lfs merge=lfs -text
58
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/detect_refine_pedestrian.png filter=lfs diff=lfs merge=lfs -text
59
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/diversify_input_metal_barrier.png filter=lfs diff=lfs merge=lfs -text
60
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/diversify_input_parked_dark_car.png filter=lfs diff=lfs merge=lfs -text
61
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/crops/diversify_input_pedestrian.png filter=lfs diff=lfs merge=lfs -text
62
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/main_image.png filter=lfs diff=lfs merge=lfs -text
63
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/references/ref_metal_barrier.png filter=lfs diff=lfs merge=lfs -text
64
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/references/ref_parked_dark_car.png filter=lfs diff=lfs merge=lfs -text
65
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000001/references/ref_pedestrian.png filter=lfs diff=lfs merge=lfs -text
66
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
67
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_city_buildings.png filter=lfs diff=lfs merge=lfs -text
68
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_street_light.png filter=lfs diff=lfs merge=lfs -text
69
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_street_trees.png filter=lfs diff=lfs merge=lfs -text
70
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_twilight_sky.png filter=lfs diff=lfs merge=lfs -text
71
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/detect_refine_vehicle_dashboard.png filter=lfs diff=lfs merge=lfs -text
72
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_city_buildings.png filter=lfs diff=lfs merge=lfs -text
73
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_street_light.png filter=lfs diff=lfs merge=lfs -text
74
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_street_trees.png filter=lfs diff=lfs merge=lfs -text
75
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_twilight_sky.png filter=lfs diff=lfs merge=lfs -text
76
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/crops/diversify_input_vehicle_dashboard.png filter=lfs diff=lfs merge=lfs -text
77
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/main_image.png filter=lfs diff=lfs merge=lfs -text
78
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_city_buildings.png filter=lfs diff=lfs merge=lfs -text
79
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_drainage_grate.png filter=lfs diff=lfs merge=lfs -text
80
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_parked_car_left.png filter=lfs diff=lfs merge=lfs -text
81
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_parked_suv_right.png filter=lfs diff=lfs merge=lfs -text
82
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_pedestrian_walking.png filter=lfs diff=lfs merge=lfs -text
83
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_shop_pedestrian.png filter=lfs diff=lfs merge=lfs -text
84
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_storefront_sign.png filter=lfs diff=lfs merge=lfs -text
85
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_street_signs.png filter=lfs diff=lfs merge=lfs -text
86
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_street_trees.png filter=lfs diff=lfs merge=lfs -text
87
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_traveling_dark_suv.png filter=lfs diff=lfs merge=lfs -text
88
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_twilight_sky.png filter=lfs diff=lfs merge=lfs -text
89
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_vehicle_dashboard.png filter=lfs diff=lfs merge=lfs -text
90
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_white_car.png filter=lfs diff=lfs merge=lfs -text
91
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000002/references/ref_yellow_lines.png filter=lfs diff=lfs merge=lfs -text
92
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
93
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/detect_refine_black_sedan.png filter=lfs diff=lfs merge=lfs -text
94
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/detect_refine_silver_car.png filter=lfs diff=lfs merge=lfs -text
95
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/detect_refine_waiting_pedestrian.png filter=lfs diff=lfs merge=lfs -text
96
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/diversify_input_black_sedan.png filter=lfs diff=lfs merge=lfs -text
97
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/diversify_input_silver_car.png filter=lfs diff=lfs merge=lfs -text
98
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/crops/diversify_input_waiting_pedestrian.png filter=lfs diff=lfs merge=lfs -text
99
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/main_image.png filter=lfs diff=lfs merge=lfs -text
100
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/references/ref_black_sedan.png filter=lfs diff=lfs merge=lfs -text
101
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/references/ref_silver_car.png filter=lfs diff=lfs merge=lfs -text
102
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000003/references/ref_waiting_pedestrian.png filter=lfs diff=lfs merge=lfs -text
103
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
104
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/detect_refine_dark_parked_car.png filter=lfs diff=lfs merge=lfs -text
105
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/detect_refine_street_lines.png filter=lfs diff=lfs merge=lfs -text
106
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/diversify_input_dark_parked_car.png filter=lfs diff=lfs merge=lfs -text
107
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/crops/diversify_input_street_lines.png filter=lfs diff=lfs merge=lfs -text
108
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/main_image.png filter=lfs diff=lfs merge=lfs -text
109
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_dark_parked_car.png filter=lfs diff=lfs merge=lfs -text
110
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_delivery_truck.png filter=lfs diff=lfs merge=lfs -text
111
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_street_lines.png filter=lfs diff=lfs merge=lfs -text
112
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_traffic_light.png filter=lfs diff=lfs merge=lfs -text
113
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000004/references/ref_walker.png filter=lfs diff=lfs merge=lfs -text
114
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
115
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/detect_refine_concrete_barrier.png filter=lfs diff=lfs merge=lfs -text
116
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/detect_refine_silver_car.png filter=lfs diff=lfs merge=lfs -text
117
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/diversify_input_concrete_barrier.png filter=lfs diff=lfs merge=lfs -text
118
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/diversify_input_silver_car.png filter=lfs diff=lfs merge=lfs -text
119
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/crops/diversify_input_uniformed_officer.png filter=lfs diff=lfs merge=lfs -text
120
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/main_image.png filter=lfs diff=lfs merge=lfs -text
121
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_bystander_in_suit.png filter=lfs diff=lfs merge=lfs -text
122
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_concrete_barrier.png filter=lfs diff=lfs merge=lfs -text
123
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_firefighter.png filter=lfs diff=lfs merge=lfs -text
124
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_silver_car.png filter=lfs diff=lfs merge=lfs -text
125
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_traffic_light.png filter=lfs diff=lfs merge=lfs -text
126
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000005/references/ref_uniformed_officer.png filter=lfs diff=lfs merge=lfs -text
127
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
128
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/detect_refine_dark_building_facade.png filter=lfs diff=lfs merge=lfs -text
129
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/detect_refine_double_solid_line.png filter=lfs diff=lfs merge=lfs -text
130
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/diversify_input_dark_building_facade.png filter=lfs diff=lfs merge=lfs -text
131
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/diversify_input_double_solid_line.png filter=lfs diff=lfs merge=lfs -text
132
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/crops/diversify_input_white_panel_van.png filter=lfs diff=lfs merge=lfs -text
133
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/main_image.png filter=lfs diff=lfs merge=lfs -text
134
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_awning_building_corner.png filter=lfs diff=lfs merge=lfs -text
135
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_dark_building_facade.png filter=lfs diff=lfs merge=lfs -text
136
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_distant_pedestrian.png filter=lfs diff=lfs merge=lfs -text
137
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_double_solid_line.png filter=lfs diff=lfs merge=lfs -text
138
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_emergency_vehicle.png filter=lfs diff=lfs merge=lfs -text
139
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_green_street_sign.png filter=lfs diff=lfs merge=lfs -text
140
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_vertical_illuminated_sign.png filter=lfs diff=lfs merge=lfs -text
141
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000006/references/ref_white_panel_van.png filter=lfs diff=lfs merge=lfs -text
142
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
143
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_brick_building_right.png filter=lfs diff=lfs merge=lfs -text
144
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_crosswalk_markings.png filter=lfs diff=lfs merge=lfs -text
145
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_dashboard.png filter=lfs diff=lfs merge=lfs -text
146
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_iron_balcony.png filter=lfs diff=lfs merge=lfs -text
147
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_multi_story_building_left.png filter=lfs diff=lfs merge=lfs -text
148
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_overhead_wires.png filter=lfs diff=lfs merge=lfs -text
149
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/detect_refine_street_light_pole.png filter=lfs diff=lfs merge=lfs -text
150
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_dashboard.png filter=lfs diff=lfs merge=lfs -text
151
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_iron_balcony.png filter=lfs diff=lfs merge=lfs -text
152
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_multi_story_building_left.png filter=lfs diff=lfs merge=lfs -text
153
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_overhead_wires.png filter=lfs diff=lfs merge=lfs -text
154
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/crops/diversify_input_street_light_pole.png filter=lfs diff=lfs merge=lfs -text
155
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/main_image.png filter=lfs diff=lfs merge=lfs -text
156
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_brick_building_right.png filter=lfs diff=lfs merge=lfs -text
157
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_bunch_of_balloons.png filter=lfs diff=lfs merge=lfs -text
158
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_crosswalk_markings.png filter=lfs diff=lfs merge=lfs -text
159
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_dark_car_1.png filter=lfs diff=lfs merge=lfs -text
160
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_dark_car_2.png filter=lfs diff=lfs merge=lfs -text
161
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_dashboard.png filter=lfs diff=lfs merge=lfs -text
162
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_iron_balcony.png filter=lfs diff=lfs merge=lfs -text
163
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_multi_story_building_left.png filter=lfs diff=lfs merge=lfs -text
164
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_overhead_wires.png filter=lfs diff=lfs merge=lfs -text
165
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_white_garbage_bag.png filter=lfs diff=lfs merge=lfs -text
166
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_white_sedan.png filter=lfs diff=lfs merge=lfs -text
167
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000007/references/ref_woman_in_dark_dress.png filter=lfs diff=lfs merge=lfs -text
168
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
169
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/detect_refine_dashboard_reflection.png filter=lfs diff=lfs merge=lfs -text
170
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/detect_refine_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
171
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/detect_refine_street_lamp.png filter=lfs diff=lfs merge=lfs -text
172
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_dashboard_reflection.png filter=lfs diff=lfs merge=lfs -text
173
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
174
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_sign_holder.png filter=lfs diff=lfs merge=lfs -text
175
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_street_lamp.png filter=lfs diff=lfs merge=lfs -text
176
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_woman_waiting.png filter=lfs diff=lfs merge=lfs -text
177
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/crops/diversify_input_young_man_waiting.png filter=lfs diff=lfs merge=lfs -text
178
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/main_image.png filter=lfs diff=lfs merge=lfs -text
179
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_businessman_waiting.png filter=lfs diff=lfs merge=lfs -text
180
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_dashboard_reflection.png filter=lfs diff=lfs merge=lfs -text
181
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_crossing_right.png filter=lfs diff=lfs merge=lfs -text
182
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_standing.png filter=lfs diff=lfs merge=lfs -text
183
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
184
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_pedestrian_walking_away_sidewalk.png filter=lfs diff=lfs merge=lfs -text
185
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_sign_holder.png filter=lfs diff=lfs merge=lfs -text
186
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_street_lamp.png filter=lfs diff=lfs merge=lfs -text
187
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_woman_waiting.png filter=lfs diff=lfs merge=lfs -text
188
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000008/references/ref_young_man_waiting.png filter=lfs diff=lfs merge=lfs -text
189
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
190
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/detect_refine_metal_railing.png filter=lfs diff=lfs merge=lfs -text
191
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/detect_refine_overpass.png filter=lfs diff=lfs merge=lfs -text
192
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/detect_refine_yellow_lane_line.png filter=lfs diff=lfs merge=lfs -text
193
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_metal_railing.png filter=lfs diff=lfs merge=lfs -text
194
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_overpass.png filter=lfs diff=lfs merge=lfs -text
195
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_street_light.png filter=lfs diff=lfs merge=lfs -text
196
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/crops/diversify_input_yellow_lane_line.png filter=lfs diff=lfs merge=lfs -text
197
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/main_image.png filter=lfs diff=lfs merge=lfs -text
198
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_metal_railing.png filter=lfs diff=lfs merge=lfs -text
199
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_overpass.png filter=lfs diff=lfs merge=lfs -text
200
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_pedestrian_in_suit.png filter=lfs diff=lfs merge=lfs -text
201
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000009/references/ref_yellow_lane_line.png filter=lfs diff=lfs merge=lfs -text
202
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/bbox_overlay.png filter=lfs diff=lfs merge=lfs -text
203
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_black_suv.png filter=lfs diff=lfs merge=lfs -text
204
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_man_in_grey_sweater.png filter=lfs diff=lfs merge=lfs -text
205
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_pedestrian_in_light_blue.png filter=lfs diff=lfs merge=lfs -text
206
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_pedestrian_in_striped_shirt.png filter=lfs diff=lfs merge=lfs -text
207
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/detect_refine_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
208
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_black_suv.png filter=lfs diff=lfs merge=lfs -text
209
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_man_in_grey_sweater.png filter=lfs diff=lfs merge=lfs -text
210
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_man_in_pink_shirt.png filter=lfs diff=lfs merge=lfs -text
211
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_in_light_blue.png filter=lfs diff=lfs merge=lfs -text
212
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_in_striped_shirt.png filter=lfs diff=lfs merge=lfs -text
213
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
214
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/crops/diversify_input_pedestrian_with_backpack.png filter=lfs diff=lfs merge=lfs -text
215
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/main_image.png filter=lfs diff=lfs merge=lfs -text
216
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_black_suv.png filter=lfs diff=lfs merge=lfs -text
217
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_man_in_grey_sweater.png filter=lfs diff=lfs merge=lfs -text
218
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_man_in_pink_shirt.png filter=lfs diff=lfs merge=lfs -text
219
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_light_blue.png filter=lfs diff=lfs merge=lfs -text
220
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_light_jacket.png filter=lfs diff=lfs merge=lfs -text
221
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_red.png filter=lfs diff=lfs merge=lfs -text
222
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_in_striped_shirt.png filter=lfs diff=lfs merge=lfs -text
223
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_walking_away.png filter=lfs diff=lfs merge=lfs -text
224
+ samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/sample_000010/references/ref_pedestrian_with_backpack.png filter=lfs diff=lfs merge=lfs -text
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # samples_v8
2
+
3
+ Generated with `data_recipe_v8.md`: vocabulary-first planning, adaptive canvas selection, structured JSON compose prompts, no identity verification, no gate, SAM white-background reference postprocessing, and strict reference-completeness verification with regenerate-until-pass behavior.
4
+
5
+ - chat model: `gcp/google/gemini-3.1-pro-preview`
6
+ - image model: `gcp/google/gemini-3-pro-image-preview`
7
+ - people references: `white_bg_full_body_front`
8
+ - non-person references: `white_bg_encyclopedia_photo`
9
+ - SAM postprocess: every generated reference is segmented with `sam_vit_b` and pasted onto pure `#ffffff` background
10
+ - reference verify max attempts per subject: `10`
11
+ - allowed canvases: `[{"aspect_ratio": "1:1", "size": [1024, 1024], "style": "photorealistic"}, {"aspect_ratio": "4:3", "size": [1152, 864], "style": "photorealistic"}, {"aspect_ratio": "3:4", "size": [864, 1152], "style": "photorealistic"}, {"aspect_ratio": "3:2", "size": [1248, 832], "style": "photorealistic"}, {"aspect_ratio": "2:3", "size": [832, 1248], "style": "photorealistic"}, {"aspect_ratio": "16:9", "size": [1280, 720], "style": "photorealistic"}, {"aspect_ratio": "9:16", "size": [720, 1280], "style": "photorealistic"}]`
12
+ - scenario mode: `driving`
13
+ - pools: `vocab_task_pool`, `plan_pool`, `scene_pool`, `detection_pool`, `reference_pool`
14
+ - bbox overlay: `bbox_overlay.png` draws every planned subject bbox; a sample is rejected and regenerated if any planned subject is still missing after VLM detection retries
15
+ - detection max attempts per subject: `3`
16
+ - launch args: `{"compose_workers": 3, "detect_max_attempts": 3, "detect_workers": 3, "emit_workers": 4, "idle_sleep": 1.0, "image_inflight": 32, "image_interval": 0.05, "image_max_retries": 8, "max_retries": 3, "no_topup": false, "plan_workers": 6, "ref_verify_max_attempts": 10, "reference_workers": 6, "requeue_in_progress": true, "seed": 1781927993, "status_interval": 30.0, "subject_detect_workers": 24, "target_samples": 10}`
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.json ADDED
The diff for this file is too large to render. See raw diff
 
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/dataset.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000001.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000001",
3
+ "plan_path": "sample_000001/plan.json",
4
+ "task_path": "sample_000001/vocab_task.json",
5
+ "main_image": "sample_000001/main_image.png",
6
+ "detections": "sample_000001/detections.json",
7
+ "n_detected": 3,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000001",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000002.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000002",
3
+ "plan_path": "sample_000002/plan.json",
4
+ "task_path": "sample_000002/vocab_task.json",
5
+ "main_image": "sample_000002/main_image.png",
6
+ "detections": "sample_000002/detections.json",
7
+ "n_detected": 15,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000002",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000003.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000003",
3
+ "plan_path": "sample_000003/plan.json",
4
+ "task_path": "sample_000003/vocab_task.json",
5
+ "main_image": "sample_000003/main_image.png",
6
+ "detections": "sample_000003/detections.json",
7
+ "n_detected": 3,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000003",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000004.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000004",
3
+ "plan_path": "sample_000004/plan.json",
4
+ "task_path": "sample_000004/vocab_task.json",
5
+ "main_image": "sample_000004/main_image.png",
6
+ "detections": "sample_000004/detections.json",
7
+ "n_detected": 5,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000004",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000005.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000005",
3
+ "plan_path": "sample_000005/plan.json",
4
+ "task_path": "sample_000005/vocab_task.json",
5
+ "main_image": "sample_000005/main_image.png",
6
+ "detections": "sample_000005/detections.json",
7
+ "n_detected": 6,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000005",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000006.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000006",
3
+ "plan_path": "sample_000006/plan.json",
4
+ "task_path": "sample_000006/vocab_task.json",
5
+ "main_image": "sample_000006/main_image.png",
6
+ "detections": "sample_000006/detections.json",
7
+ "n_detected": 8,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000006",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000007.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000007",
3
+ "plan_path": "sample_000007/plan.json",
4
+ "task_path": "sample_000007/vocab_task.json",
5
+ "main_image": "sample_000007/main_image.png",
6
+ "detections": "sample_000007/detections.json",
7
+ "n_detected": 14,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000007",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000008.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000008",
3
+ "plan_path": "sample_000008/plan.json",
4
+ "task_path": "sample_000008/vocab_task.json",
5
+ "main_image": "sample_000008/main_image.png",
6
+ "detections": "sample_000008/detections.json",
7
+ "n_detected": 10,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000008",
13
+ "pool": "detection_pool",
14
+ "retry_count": 1,
15
+ "errors": [
16
+ {
17
+ "time": 1782293479.3988369,
18
+ "error": "RuntimeError: reference generation or verification failed for street_lamp: RuntimeError: reference verification failed for street_lamp after 10 attempts: heavily truncated; missing main identifying part (lamp fixture)",
19
+ "traceback": "Traceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 866, in generate_references\n references.append(helpers.diversify_subject(get_client(), sdir, main_image, subject, detection))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/v8_helpers.py\", line 693, in diversify_subject\n raise RuntimeError(\nRuntimeError: reference verification failed for street_lamp after 10 attempts: heavily truncated; missing main identifying part (lamp fixture)\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1020, in worker_loop\n handler(manifest)\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 1113, in handler\n references, reference_errors = generate_references(sample_id, plan, detections)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/samples_v8/generate_samples_v8.py\", line 871, in generate_references\n raise RuntimeError(f\"reference generation or verification failed for {name}: {errors[name]}\") from exc\nRuntimeError: reference generation or verification failed for street_lamp: RuntimeError: reference verification failed for street_lamp after 10 attempts: heavily truncated; missing main identifying part (lamp fixture)\n"
20
+ }
21
+ ]
22
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000009.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000009",
3
+ "plan_path": "sample_000009/plan.json",
4
+ "task_path": "sample_000009/vocab_task.json",
5
+ "main_image": "sample_000009/main_image.png",
6
+ "detections": "sample_000009/detections.json",
7
+ "n_detected": 5,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000009",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/detection_pool/done/sample_000010.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000010",
3
+ "plan_path": "sample_000010/plan.json",
4
+ "task_path": "sample_000010/vocab_task.json",
5
+ "main_image": "sample_000010/main_image.png",
6
+ "detections": "sample_000010/detections.json",
7
+ "n_detected": 9,
8
+ "model_ids": {
9
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
10
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
11
+ },
12
+ "item_id": "sample_000010",
13
+ "pool": "detection_pool",
14
+ "retry_count": 0,
15
+ "errors": []
16
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000001.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000001",
3
+ "row": "sample_000001/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000002.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000002",
3
+ "row": "sample_000002/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000003.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000003",
3
+ "row": "sample_000003/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000004.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000004",
3
+ "row": "sample_000004/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000005.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000005",
3
+ "row": "sample_000005/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000006.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000006",
3
+ "row": "sample_000006/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000007.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000007",
3
+ "row": "sample_000007/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000008.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000008",
3
+ "row": "sample_000008/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000009.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000009",
3
+ "row": "sample_000009/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/emit_pool/done/sample_000010.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000010",
3
+ "row": "sample_000010/row.json"
4
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000001.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000001",
3
+ "plan_path": "sample_000001/plan.json",
4
+ "task_path": "sample_000001/vocab_task.json",
5
+ "prompt_hash": "f859db774945f5b6373454badaca82233955ab8ca8b947dbecdab4a6975bb976",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000001",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000002.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000002",
3
+ "plan_path": "sample_000002/plan.json",
4
+ "task_path": "sample_000002/vocab_task.json",
5
+ "prompt_hash": "0e4f0ac71145112a7a163e8d8798062138922bb63e746f904a784c8608c2d011",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000002",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000003.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000003",
3
+ "plan_path": "sample_000003/plan.json",
4
+ "task_path": "sample_000003/vocab_task.json",
5
+ "prompt_hash": "325b286ae7c454ccf5a9081711812c3439116af9f5e2b39f2029ac6c415ac47e",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000003",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000004.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000004",
3
+ "plan_path": "sample_000004/plan.json",
4
+ "task_path": "sample_000004/vocab_task.json",
5
+ "prompt_hash": "9d98080d776626e886894383e176082ba23f7c536f920f60fd7462fb1e21e246",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000004",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000005.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000005",
3
+ "plan_path": "sample_000005/plan.json",
4
+ "task_path": "sample_000005/vocab_task.json",
5
+ "prompt_hash": "dd17f99d7265de9375dbd703b97e688634f8632997dd81e3c11f787d9cb95361",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000005",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000006.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000006",
3
+ "plan_path": "sample_000006/plan.json",
4
+ "task_path": "sample_000006/vocab_task.json",
5
+ "prompt_hash": "98e81e2eedabb51a9bd1b9a35847125d84c5ef899ddbf672c226fe62b69506b1",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000006",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000007.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000007",
3
+ "plan_path": "sample_000007/plan.json",
4
+ "task_path": "sample_000007/vocab_task.json",
5
+ "prompt_hash": "a11d77c30b9e5c0a4d946297ad0615803424cd56063dca0351a861c7c079059b",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000007",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000008.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000008",
3
+ "plan_path": "sample_000008/plan.json",
4
+ "task_path": "sample_000008/vocab_task.json",
5
+ "prompt_hash": "fd36686545adb806e34c1b62a7004d06a56e505d47d056ee5b0b89752f3d75bd",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000008",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000009.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000009",
3
+ "plan_path": "sample_000009/plan.json",
4
+ "task_path": "sample_000009/vocab_task.json",
5
+ "prompt_hash": "bddb7951d32d26de33b4ea1a6aa00cdbd7da12d9349508f23b30d7e3e1eadef8",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000009",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/plan_pool/done/sample_000010.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000010",
3
+ "plan_path": "sample_000010/plan.json",
4
+ "task_path": "sample_000010/vocab_task.json",
5
+ "prompt_hash": "cc6385c75e6510ab98831d1b4008bb6fc0bdaf521d9d020a82a36d3a08b9d8d8",
6
+ "model_ids": {
7
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
8
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
9
+ },
10
+ "item_id": "sample_000010",
11
+ "pool": "plan_pool",
12
+ "retry_count": 0,
13
+ "errors": []
14
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000001.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000001",
3
+ "plan_path": "sample_000001/plan.json",
4
+ "task_path": "sample_000001/vocab_task.json",
5
+ "main_image": "sample_000001/main_image.png",
6
+ "detections": "sample_000001/detections.json",
7
+ "references": "sample_000001/references.json",
8
+ "n_references": 3,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000001",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000002.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000002",
3
+ "plan_path": "sample_000002/plan.json",
4
+ "task_path": "sample_000002/vocab_task.json",
5
+ "main_image": "sample_000002/main_image.png",
6
+ "detections": "sample_000002/detections.json",
7
+ "references": "sample_000002/references.json",
8
+ "n_references": 15,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000002",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000003.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000003",
3
+ "plan_path": "sample_000003/plan.json",
4
+ "task_path": "sample_000003/vocab_task.json",
5
+ "main_image": "sample_000003/main_image.png",
6
+ "detections": "sample_000003/detections.json",
7
+ "references": "sample_000003/references.json",
8
+ "n_references": 3,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000003",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000004.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000004",
3
+ "plan_path": "sample_000004/plan.json",
4
+ "task_path": "sample_000004/vocab_task.json",
5
+ "main_image": "sample_000004/main_image.png",
6
+ "detections": "sample_000004/detections.json",
7
+ "references": "sample_000004/references.json",
8
+ "n_references": 5,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000004",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000005.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000005",
3
+ "plan_path": "sample_000005/plan.json",
4
+ "task_path": "sample_000005/vocab_task.json",
5
+ "main_image": "sample_000005/main_image.png",
6
+ "detections": "sample_000005/detections.json",
7
+ "references": "sample_000005/references.json",
8
+ "n_references": 6,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000005",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000006.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000006",
3
+ "plan_path": "sample_000006/plan.json",
4
+ "task_path": "sample_000006/vocab_task.json",
5
+ "main_image": "sample_000006/main_image.png",
6
+ "detections": "sample_000006/detections.json",
7
+ "references": "sample_000006/references.json",
8
+ "n_references": 8,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000006",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000007.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000007",
3
+ "plan_path": "sample_000007/plan.json",
4
+ "task_path": "sample_000007/vocab_task.json",
5
+ "main_image": "sample_000007/main_image.png",
6
+ "detections": "sample_000007/detections.json",
7
+ "references": "sample_000007/references.json",
8
+ "n_references": 14,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000007",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000008.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000008",
3
+ "plan_path": "sample_000008/plan.json",
4
+ "task_path": "sample_000008/vocab_task.json",
5
+ "main_image": "sample_000008/main_image.png",
6
+ "detections": "sample_000008/detections.json",
7
+ "references": "sample_000008/references.json",
8
+ "n_references": 10,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000008",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000009.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000009",
3
+ "plan_path": "sample_000009/plan.json",
4
+ "task_path": "sample_000009/vocab_task.json",
5
+ "main_image": "sample_000009/main_image.png",
6
+ "detections": "sample_000009/detections.json",
7
+ "references": "sample_000009/references.json",
8
+ "n_references": 5,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000009",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/reference_pool/done/sample_000010.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000010",
3
+ "plan_path": "sample_000010/plan.json",
4
+ "task_path": "sample_000010/vocab_task.json",
5
+ "main_image": "sample_000010/main_image.png",
6
+ "detections": "sample_000010/detections.json",
7
+ "references": "sample_000010/references.json",
8
+ "n_references": 9,
9
+ "reference_errors": {},
10
+ "model_ids": {
11
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
12
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
13
+ },
14
+ "item_id": "sample_000010",
15
+ "pool": "reference_pool",
16
+ "retry_count": 0,
17
+ "errors": []
18
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000001.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000001",
3
+ "target_total": 3,
4
+ "target_people": 1,
5
+ "target_objects": 2,
6
+ "canvas_size": [
7
+ 1248,
8
+ 832
9
+ ],
10
+ "canvas_aspect_ratio": "3:2",
11
+ "main_image": "main_image.png",
12
+ "bbox_overlay": "bbox_overlay.png",
13
+ "plan": "plan.json",
14
+ "detections": "detections.json",
15
+ "vocab_task": "vocab_task.json",
16
+ "n_planned": 3,
17
+ "n_detected": 3,
18
+ "n_subjects": 3,
19
+ "subjects": [
20
+ {
21
+ "name": "pedestrian",
22
+ "is_person": true,
23
+ "subject_type": "person",
24
+ "source_set": "people_set",
25
+ "source_image_id": "CrowdHuman:data/data_69/273278,12fc4700013112375.jpg:person:3",
26
+ "source_name": "pedestrian",
27
+ "source_description": "A person wearing a dark coat and trousers. Source dataset: CrowdHuman. Scene context: A bustling city street lined with trees showcasing vibrant yellow autumn foliage, with many pedestrians walking in both directions.",
28
+ "sub_caption": "pedestrian: A person wearing a dark coat and trousers, walking confidently.. Scene role: Walking along the curbside near the barrier.",
29
+ "measured_bbox": [
30
+ 0.1528,
31
+ 0.301,
32
+ 0.2511,
33
+ 0.7071
34
+ ],
35
+ "detection_confidence": 0.98,
36
+ "ref_style": "white_bg_full_body_front",
37
+ "ref_image": "references/ref_pedestrian.png",
38
+ "raw_ref_image": "references/raw_ref_pedestrian_attempt_01.png",
39
+ "reference_verify": "references/reference_verify_pedestrian.json",
40
+ "reference_verify_passed": true,
41
+ "reference_attempts": 1,
42
+ "sam_white_bg": {
43
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000001/references/raw_ref_pedestrian_attempt_01.png",
44
+ "output": "references/ref_pedestrian.png",
45
+ "mask": "references/sam_mask_pedestrian.png",
46
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
47
+ "sam_model_type": "vit_b",
48
+ "sam_device": "auto",
49
+ "sam_working_size": [
50
+ 640,
51
+ 640
52
+ ],
53
+ "sam_max_side": 640,
54
+ "sam_downscale": 0.625,
55
+ "prompt_box_xyxy": [
56
+ 330.0,
57
+ 42.0,
58
+ 698.0,
59
+ 1007.0
60
+ ],
61
+ "mask_score": 3.413244,
62
+ "mask_area_ratio": 0.159381,
63
+ "elapsed_seconds": 33.2771
64
+ }
65
+ },
66
+ {
67
+ "name": "parked_dark_car",
68
+ "is_person": false,
69
+ "subject_type": "object",
70
+ "source_set": "obj_set",
71
+ "source_image_id": "BDD100K:bcb356f6-520dd65c:object:9",
72
+ "source_name": "parked dark car",
73
+ "source_description": "A dark-colored sedan parked on the right side of the street, behind the silver car. Source dataset: BDD100K. Scene context: A large white New York City bus is driving down a city street on an overcast day with other cars parked and driving around it.",
74
+ "sub_caption": "parked dark car: A dark-colored sedan.. Scene role: Parked on the street near the curb in the background right.",
75
+ "measured_bbox": [
76
+ 0.5163,
77
+ 0.3897,
78
+ 0.9968,
79
+ 0.9244
80
+ ],
81
+ "detection_confidence": 0.98,
82
+ "ref_style": "white_bg_encyclopedia_photo",
83
+ "ref_image": "references/ref_parked_dark_car.png",
84
+ "raw_ref_image": "references/raw_ref_parked_dark_car_attempt_01.png",
85
+ "reference_verify": "references/reference_verify_parked_dark_car.json",
86
+ "reference_verify_passed": true,
87
+ "reference_attempts": 1,
88
+ "sam_white_bg": {
89
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000001/references/raw_ref_parked_dark_car_attempt_01.png",
90
+ "output": "references/ref_parked_dark_car.png",
91
+ "mask": "references/sam_mask_parked_dark_car.png",
92
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
93
+ "sam_model_type": "vit_b",
94
+ "sam_device": "auto",
95
+ "sam_working_size": [
96
+ 640,
97
+ 640
98
+ ],
99
+ "sam_max_side": 640,
100
+ "sam_downscale": 0.625,
101
+ "prompt_box_xyxy": [
102
+ 47.0,
103
+ 315.0,
104
+ 976.0,
105
+ 694.0
106
+ ],
107
+ "mask_score": 3.4345,
108
+ "mask_area_ratio": 0.180014,
109
+ "elapsed_seconds": 7.1991
110
+ }
111
+ },
112
+ {
113
+ "name": "metal_barrier",
114
+ "is_person": false,
115
+ "subject_type": "object",
116
+ "source_set": "obj_set",
117
+ "source_image_id": "CrowdHuman:data/data_74/284193,1da20000b642be5b.jpg:object:5",
118
+ "source_name": "metal barrier",
119
+ "source_description": "silver metal barricade placed near the entrance Source dataset: CrowdHuman. Scene context: People are gathered outside the entrance of a stone building with arched doorways and large windows.",
120
+ "sub_caption": "metal barrier: A silver metal barricade placed along the street.. Scene role: Positioned along the curb to section off the pedestrian area from the road.",
121
+ "measured_bbox": [
122
+ 0.3454,
123
+ 0.4302,
124
+ 0.5465,
125
+ 0.8402
126
+ ],
127
+ "detection_confidence": 0.98,
128
+ "ref_style": "white_bg_encyclopedia_photo",
129
+ "ref_image": "references/ref_metal_barrier.png",
130
+ "raw_ref_image": "references/raw_ref_metal_barrier_attempt_01.png",
131
+ "reference_verify": "references/reference_verify_metal_barrier.json",
132
+ "reference_verify_passed": true,
133
+ "reference_attempts": 1,
134
+ "sam_white_bg": {
135
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000001/references/raw_ref_metal_barrier_attempt_01.png",
136
+ "output": "references/ref_metal_barrier.png",
137
+ "mask": "references/sam_mask_metal_barrier.png",
138
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
139
+ "sam_model_type": "vit_b",
140
+ "sam_device": "auto",
141
+ "sam_working_size": [
142
+ 640,
143
+ 640
144
+ ],
145
+ "sam_max_side": 640,
146
+ "sam_downscale": 0.625,
147
+ "prompt_box_xyxy": [
148
+ 2.0,
149
+ 107.0,
150
+ 1009.0,
151
+ 986.0
152
+ ],
153
+ "mask_score": 1.555076,
154
+ "mask_area_ratio": 0.845579,
155
+ "elapsed_seconds": 7.2854
156
+ }
157
+ }
158
+ ],
159
+ "not_emitted": [],
160
+ "model_ids": {
161
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
162
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
163
+ }
164
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000002.json ADDED
@@ -0,0 +1,716 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000002",
3
+ "target_total": 15,
4
+ "target_people": 2,
5
+ "target_objects": 13,
6
+ "canvas_size": [
7
+ 1152,
8
+ 864
9
+ ],
10
+ "canvas_aspect_ratio": "4:3",
11
+ "main_image": "main_image.png",
12
+ "bbox_overlay": "bbox_overlay.png",
13
+ "plan": "plan.json",
14
+ "detections": "detections.json",
15
+ "vocab_task": "vocab_task.json",
16
+ "n_planned": 15,
17
+ "n_detected": 15,
18
+ "n_subjects": 15,
19
+ "subjects": [
20
+ {
21
+ "name": "pedestrian_walking",
22
+ "is_person": true,
23
+ "subject_type": "person",
24
+ "source_set": "people_set",
25
+ "source_image_id": "BDD100K:c5d864fa-b0b2380b:person:2",
26
+ "source_name": "pedestrian",
27
+ "source_description": "Person walking away from the camera on the right sidewalk, wearing dark clothing. Source dataset: BDD100K. Scene context: View from inside a vehicle driving down a wet city street lined with tall buildings, with pedestrians on the sidewalk and several cars and taxis ahead in the rain.",
28
+ "sub_caption": "pedestrian: Person walking away from the camera on the right sidewalk, wearing dark clothing.. Scene role: walking along the sidewalk on the right side of the street",
29
+ "measured_bbox": [
30
+ 0.7497,
31
+ 0.4757,
32
+ 0.7954,
33
+ 0.6192
34
+ ],
35
+ "detection_confidence": 100,
36
+ "ref_style": "white_bg_full_body_front",
37
+ "ref_image": "references/ref_pedestrian_walking.png",
38
+ "raw_ref_image": "references/raw_ref_pedestrian_walking_attempt_01.png",
39
+ "reference_verify": "references/reference_verify_pedestrian_walking.json",
40
+ "reference_verify_passed": true,
41
+ "reference_attempts": 1,
42
+ "sam_white_bg": {
43
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_pedestrian_walking_attempt_01.png",
44
+ "output": "references/ref_pedestrian_walking.png",
45
+ "mask": "references/sam_mask_pedestrian_walking.png",
46
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
47
+ "sam_model_type": "vit_b",
48
+ "sam_device": "auto",
49
+ "sam_working_size": [
50
+ 640,
51
+ 640
52
+ ],
53
+ "sam_max_side": 640,
54
+ "sam_downscale": 0.625,
55
+ "prompt_box_xyxy": [
56
+ 341.0,
57
+ 63.0,
58
+ 695.0,
59
+ 972.0
60
+ ],
61
+ "mask_score": 3.459152,
62
+ "mask_area_ratio": 0.145545,
63
+ "elapsed_seconds": 8.3331
64
+ }
65
+ },
66
+ {
67
+ "name": "shop_pedestrian",
68
+ "is_person": true,
69
+ "subject_type": "person",
70
+ "source_set": "people_set",
71
+ "source_image_id": "BDD100K:b714a088-861a043b:person:2",
72
+ "source_name": "pedestrian",
73
+ "source_description": "another person near the shop entrance on the left, partially obscured Source dataset: BDD100K. Scene context: a city street scene from the perspective of a vehicle, showing multiple cars in motion, buildings lining the road, and pedestrians on the sidewalks during dusk or early evening",
74
+ "sub_caption": "pedestrian: Person standing near a shop entrance on the right, partially obscured.. Scene role: standing on the sidewalk near the storefronts on the right",
75
+ "measured_bbox": [
76
+ 0.9337,
77
+ 0.4752,
78
+ 0.9695,
79
+ 0.6107
80
+ ],
81
+ "detection_confidence": 0.98,
82
+ "ref_style": "white_bg_full_body_front",
83
+ "ref_image": "references/ref_shop_pedestrian.png",
84
+ "raw_ref_image": "references/raw_ref_shop_pedestrian_attempt_02.png",
85
+ "reference_verify": "references/reference_verify_shop_pedestrian.json",
86
+ "reference_verify_passed": true,
87
+ "reference_attempts": 2,
88
+ "sam_white_bg": {
89
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_shop_pedestrian_attempt_02.png",
90
+ "output": "references/ref_shop_pedestrian.png",
91
+ "mask": "references/sam_mask_shop_pedestrian.png",
92
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
93
+ "sam_model_type": "vit_b",
94
+ "sam_device": "auto",
95
+ "sam_working_size": [
96
+ 640,
97
+ 640
98
+ ],
99
+ "sam_max_side": 640,
100
+ "sam_downscale": 0.625,
101
+ "prompt_box_xyxy": [
102
+ 312.0,
103
+ 43.0,
104
+ 719.0,
105
+ 1020.0
106
+ ],
107
+ "mask_score": 3.162079,
108
+ "mask_area_ratio": 0.167512,
109
+ "elapsed_seconds": 7.2283
110
+ }
111
+ },
112
+ {
113
+ "name": "city_buildings",
114
+ "is_person": false,
115
+ "subject_type": "object",
116
+ "source_set": "obj_set",
117
+ "source_image_id": "CrowdHuman:data/data_9/283554,31eeb000e9237b31.jpg:object:9",
118
+ "source_name": "building",
119
+ "source_description": "Various city buildings of different heights forming the skyline in the background. Source dataset: CrowdHuman. Scene context: People are walking and resting on the wooden walkway of a large suspension bridge with a city skyline in the background.",
120
+ "sub_caption": "building: Various city buildings of different heights forming the skyline and lining the street.. Scene role: framing the street and forming the background skyline",
121
+ "measured_bbox": [
122
+ 0.0,
123
+ 0.0,
124
+ 1.0,
125
+ 0.6084
126
+ ],
127
+ "detection_confidence": 0.9,
128
+ "ref_style": "white_bg_encyclopedia_photo",
129
+ "ref_image": "references/ref_city_buildings.png",
130
+ "raw_ref_image": "references/raw_ref_city_buildings_attempt_01.png",
131
+ "reference_verify": "references/reference_verify_city_buildings.json",
132
+ "reference_verify_passed": true,
133
+ "reference_attempts": 1,
134
+ "sam_white_bg": {
135
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_city_buildings_attempt_01.png",
136
+ "output": "references/ref_city_buildings.png",
137
+ "mask": "references/sam_mask_city_buildings.png",
138
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
139
+ "sam_model_type": "vit_b",
140
+ "sam_device": "auto",
141
+ "sam_working_size": [
142
+ 640,
143
+ 640
144
+ ],
145
+ "sam_max_side": 640,
146
+ "sam_downscale": 0.625,
147
+ "prompt_box_xyxy": [
148
+ 14.0,
149
+ 171.0,
150
+ 1009.0,
151
+ 883.0
152
+ ],
153
+ "mask_score": 3.176814,
154
+ "mask_area_ratio": 0.327415,
155
+ "elapsed_seconds": 7.146
156
+ }
157
+ },
158
+ {
159
+ "name": "street_signs",
160
+ "is_person": false,
161
+ "subject_type": "object",
162
+ "source_set": "obj_set",
163
+ "source_image_id": "BDD100K:b5ab0e46-8eab4733:object:8",
164
+ "source_name": "street signs",
165
+ "source_description": "Various street signs attached to a pole on the right side of the street, including a speed limit sign. Source dataset: BDD100K. Scene context: A narrow city street with several parked and moving box trucks and vans, bordered by multi-story buildings and construction barriers.",
166
+ "sub_caption": "street signs: Various blank street signs attached to a pole on the right side of the street.. Scene role: mounted on a pole next to the right sidewalk",
167
+ "measured_bbox": [
168
+ 0.641,
169
+ 0.165,
170
+ 0.744,
171
+ 0.408
172
+ ],
173
+ "detection_confidence": 0.9,
174
+ "ref_style": "white_bg_encyclopedia_photo",
175
+ "ref_image": "references/ref_street_signs.png",
176
+ "raw_ref_image": "references/raw_ref_street_signs_attempt_01.png",
177
+ "reference_verify": "references/reference_verify_street_signs.json",
178
+ "reference_verify_passed": true,
179
+ "reference_attempts": 1,
180
+ "sam_white_bg": {
181
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_street_signs_attempt_01.png",
182
+ "output": "references/ref_street_signs.png",
183
+ "mask": "references/sam_mask_street_signs.png",
184
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
185
+ "sam_model_type": "vit_b",
186
+ "sam_device": "auto",
187
+ "sam_working_size": [
188
+ 640,
189
+ 640
190
+ ],
191
+ "sam_max_side": 640,
192
+ "sam_downscale": 0.625,
193
+ "prompt_box_xyxy": [
194
+ 224.0,
195
+ 0.0,
196
+ 744.0,
197
+ 1023.0
198
+ ],
199
+ "mask_score": 3.332549,
200
+ "mask_area_ratio": 0.190769,
201
+ "elapsed_seconds": 7.1886
202
+ }
203
+ },
204
+ {
205
+ "name": "storefront_sign",
206
+ "is_person": false,
207
+ "subject_type": "object",
208
+ "source_set": "obj_set",
209
+ "source_image_id": "CrowdHuman:data/data_4/273275,46a6f0005d04fc24.jpg:object:7",
210
+ "source_name": "storefront sign",
211
+ "source_description": "A dark sign with white text 'DELI' and a smaller red 'ATM' sign beneath it on the left. Source dataset: CrowdHuman. Scene context: A densely crowded city street with numerous pedestrians walking towards the camera, while emergency vehicles with flashing lights are visible in the background traffic.",
212
+ "sub_caption": "storefront sign: A dark hanging sign framework attached to a building on the right, devoid of readable text.. Scene role: hanging above the shop entrance on the right side of the road",
213
+ "measured_bbox": [
214
+ 0.7854,
215
+ 0.1934,
216
+ 0.9082,
217
+ 0.2906
218
+ ],
219
+ "detection_confidence": 0.99,
220
+ "ref_style": "white_bg_encyclopedia_photo",
221
+ "ref_image": "references/ref_storefront_sign.png",
222
+ "raw_ref_image": "references/raw_ref_storefront_sign_attempt_01.png",
223
+ "reference_verify": "references/reference_verify_storefront_sign.json",
224
+ "reference_verify_passed": true,
225
+ "reference_attempts": 1,
226
+ "sam_white_bg": {
227
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_storefront_sign_attempt_01.png",
228
+ "output": "references/ref_storefront_sign.png",
229
+ "mask": "references/sam_mask_storefront_sign.png",
230
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
231
+ "sam_model_type": "vit_b",
232
+ "sam_device": "auto",
233
+ "sam_working_size": [
234
+ 640,
235
+ 640
236
+ ],
237
+ "sam_max_side": 640,
238
+ "sam_downscale": 0.625,
239
+ "prompt_box_xyxy": [
240
+ 46.0,
241
+ 0.0,
242
+ 1023.0,
243
+ 811.0
244
+ ],
245
+ "mask_score": 3.296373,
246
+ "mask_area_ratio": 0.447847,
247
+ "elapsed_seconds": 7.3102
248
+ }
249
+ },
250
+ {
251
+ "name": "parked_suv_right",
252
+ "is_person": false,
253
+ "subject_type": "object",
254
+ "source_set": "obj_set",
255
+ "source_image_id": "BDD100K:c54441e6-400c221e:object:4",
256
+ "source_name": "parked SUV",
257
+ "source_description": "Dark-colored SUV parked ahead of the sedan on the right side of the road. Source dataset: BDD100K. Scene context: Nighttime driving scene approaching an intersection with green traffic lights and parked cars on the right.",
258
+ "sub_caption": "parked SUV: Dark-colored SUV parked on the right side of the road.. Scene role: parked alongside the right curb",
259
+ "measured_bbox": [
260
+ 0.5507,
261
+ 0.4879,
262
+ 0.6783,
263
+ 0.6234
264
+ ],
265
+ "detection_confidence": 0.95,
266
+ "ref_style": "white_bg_encyclopedia_photo",
267
+ "ref_image": "references/ref_parked_suv_right.png",
268
+ "raw_ref_image": "references/raw_ref_parked_suv_right_attempt_01.png",
269
+ "reference_verify": "references/reference_verify_parked_suv_right.json",
270
+ "reference_verify_passed": true,
271
+ "reference_attempts": 1,
272
+ "sam_white_bg": {
273
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_parked_suv_right_attempt_01.png",
274
+ "output": "references/ref_parked_suv_right.png",
275
+ "mask": "references/sam_mask_parked_suv_right.png",
276
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
277
+ "sam_model_type": "vit_b",
278
+ "sam_device": "auto",
279
+ "sam_working_size": [
280
+ 640,
281
+ 640
282
+ ],
283
+ "sam_max_side": 640,
284
+ "sam_downscale": 0.625,
285
+ "prompt_box_xyxy": [
286
+ 156.0,
287
+ 150.0,
288
+ 868.0,
289
+ 812.0
290
+ ],
291
+ "mask_score": 3.463227,
292
+ "mask_area_ratio": 0.291222,
293
+ "elapsed_seconds": 7.2583
294
+ }
295
+ },
296
+ {
297
+ "name": "parked_car_left",
298
+ "is_person": false,
299
+ "subject_type": "object",
300
+ "source_set": "obj_set",
301
+ "source_image_id": "BDD100K:bb2e43e4-5e7a7129:object:6",
302
+ "source_name": "car",
303
+ "source_description": "A dark car parked along the left curb further ahead. Source dataset: BDD100K. Scene context: A dashcam view from a vehicle driving down a multi-lane road on a partly cloudy day, with a few other cars and pedestrians on the sidewalk.",
304
+ "sub_caption": "car: A dark car parked along the left curb further ahead.. Scene role: parked alongside the left curb",
305
+ "measured_bbox": [
306
+ 0.0,
307
+ 0.5102,
308
+ 0.1259,
309
+ 0.5998
310
+ ],
311
+ "detection_confidence": 0.9,
312
+ "ref_style": "white_bg_encyclopedia_photo",
313
+ "ref_image": "references/ref_parked_car_left.png",
314
+ "raw_ref_image": "references/raw_ref_parked_car_left_attempt_01.png",
315
+ "reference_verify": "references/reference_verify_parked_car_left.json",
316
+ "reference_verify_passed": true,
317
+ "reference_attempts": 1,
318
+ "sam_white_bg": {
319
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_parked_car_left_attempt_01.png",
320
+ "output": "references/ref_parked_car_left.png",
321
+ "mask": "references/sam_mask_parked_car_left.png",
322
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
323
+ "sam_model_type": "vit_b",
324
+ "sam_device": "auto",
325
+ "sam_working_size": [
326
+ 640,
327
+ 640
328
+ ],
329
+ "sam_max_side": 640,
330
+ "sam_downscale": 0.625,
331
+ "prompt_box_xyxy": [
332
+ 0.0,
333
+ 319.0,
334
+ 1023.0,
335
+ 695.0
336
+ ],
337
+ "mask_score": 3.122119,
338
+ "mask_area_ratio": 0.19451,
339
+ "elapsed_seconds": 8.5738
340
+ }
341
+ },
342
+ {
343
+ "name": "traveling_dark_suv",
344
+ "is_person": false,
345
+ "subject_type": "object",
346
+ "source_set": "obj_set",
347
+ "source_image_id": "BDD100K:c889c950-865ca5b6:object:0",
348
+ "source_name": "dark SUV",
349
+ "source_description": "A dark SUV traveling in the left lane, with visible red taillights. Source dataset: BDD100K. Scene context: Nighttime driving on a highway with several cars visible ahead, illuminated mainly by taillights and headlights.",
350
+ "sub_caption": "dark SUV: A dark SUV traveling in the left lane, with visible red taillights reflecting the twilight.. Scene role: driving in the adjacent lane",
351
+ "measured_bbox": [
352
+ 0.2594,
353
+ 0.4853,
354
+ 0.417,
355
+ 0.6419
356
+ ],
357
+ "detection_confidence": 0.99,
358
+ "ref_style": "white_bg_encyclopedia_photo",
359
+ "ref_image": "references/ref_traveling_dark_suv.png",
360
+ "raw_ref_image": "references/raw_ref_traveling_dark_suv_attempt_01.png",
361
+ "reference_verify": "references/reference_verify_traveling_dark_suv.json",
362
+ "reference_verify_passed": true,
363
+ "reference_attempts": 1,
364
+ "sam_white_bg": {
365
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_traveling_dark_suv_attempt_01.png",
366
+ "output": "references/ref_traveling_dark_suv.png",
367
+ "mask": "references/sam_mask_traveling_dark_suv.png",
368
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
369
+ "sam_model_type": "vit_b",
370
+ "sam_device": "auto",
371
+ "sam_working_size": [
372
+ 640,
373
+ 640
374
+ ],
375
+ "sam_max_side": 640,
376
+ "sam_downscale": 0.625,
377
+ "prompt_box_xyxy": [
378
+ 119.0,
379
+ 198.0,
380
+ 910.0,
381
+ 810.0
382
+ ],
383
+ "mask_score": 3.470329,
384
+ "mask_area_ratio": 0.300606,
385
+ "elapsed_seconds": 8.5072
386
+ }
387
+ },
388
+ {
389
+ "name": "street_light",
390
+ "is_person": false,
391
+ "subject_type": "object",
392
+ "source_set": "obj_set",
393
+ "source_image_id": "BDD100K:c84f848e-2a5e0737:object:2",
394
+ "source_name": "street light",
395
+ "source_description": "Tall pole with a bright light on top, illuminating the road from the right side. Source dataset: BDD100K. Scene context: Nighttime driving view on a multi-lane highway with streetlights illuminating the dark road ahead.",
396
+ "sub_caption": "street light: Tall pole with a bright light on top, illuminating the road from the right side.. Scene role: providing illumination from the right sidewalk",
397
+ "measured_bbox": [
398
+ 0.5577,
399
+ 0.0219,
400
+ 0.6964,
401
+ 0.588
402
+ ],
403
+ "detection_confidence": 1.0,
404
+ "ref_style": "white_bg_encyclopedia_photo",
405
+ "ref_image": "references/ref_street_light.png",
406
+ "raw_ref_image": "references/raw_ref_street_light_attempt_01.png",
407
+ "reference_verify": "references/reference_verify_street_light.json",
408
+ "reference_verify_passed": true,
409
+ "reference_attempts": 1,
410
+ "sam_white_bg": {
411
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_street_light_attempt_01.png",
412
+ "output": "references/ref_street_light.png",
413
+ "mask": "references/sam_mask_street_light.png",
414
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
415
+ "sam_model_type": "vit_b",
416
+ "sam_device": "auto",
417
+ "sam_working_size": [
418
+ 640,
419
+ 640
420
+ ],
421
+ "sam_max_side": 640,
422
+ "sam_downscale": 0.625,
423
+ "prompt_box_xyxy": [
424
+ 330.0,
425
+ 17.0,
426
+ 688.0,
427
+ 996.0
428
+ ],
429
+ "mask_score": 3.395182,
430
+ "mask_area_ratio": 0.033435,
431
+ "elapsed_seconds": 7.0701
432
+ }
433
+ },
434
+ {
435
+ "name": "vehicle_dashboard",
436
+ "is_person": false,
437
+ "subject_type": "object",
438
+ "source_set": "obj_set",
439
+ "source_image_id": "BDD100K:b5032e1d-dad95b60:object:9",
440
+ "source_name": "dashboard",
441
+ "source_description": "The dark, reflective dashboard and lower windshield area of the camera vehicle in the foreground. Source dataset: BDD100K. Scene context: View from inside a vehicle driving down a residential city street lined with trees, parked cars, and multi-story brick buildings.",
442
+ "sub_caption": "dashboard: The dark, reflective dashboard and lower windshield area of the camera vehicle in the foreground.. Scene role: anchoring the bottom of the frame to establish a driver's perspective",
443
+ "measured_bbox": [
444
+ 0.0,
445
+ 0.8881,
446
+ 1.0,
447
+ 1.0
448
+ ],
449
+ "detection_confidence": 0.9,
450
+ "ref_style": "white_bg_encyclopedia_photo",
451
+ "ref_image": "references/ref_vehicle_dashboard.png",
452
+ "raw_ref_image": "references/raw_ref_vehicle_dashboard_attempt_01.png",
453
+ "reference_verify": "references/reference_verify_vehicle_dashboard.json",
454
+ "reference_verify_passed": true,
455
+ "reference_attempts": 1,
456
+ "sam_white_bg": {
457
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_vehicle_dashboard_attempt_01.png",
458
+ "output": "references/ref_vehicle_dashboard.png",
459
+ "mask": "references/sam_mask_vehicle_dashboard.png",
460
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
461
+ "sam_model_type": "vit_b",
462
+ "sam_device": "auto",
463
+ "sam_working_size": [
464
+ 640,
465
+ 640
466
+ ],
467
+ "sam_max_side": 640,
468
+ "sam_downscale": 0.625,
469
+ "prompt_box_xyxy": [
470
+ 0.0,
471
+ 223.0,
472
+ 1023.0,
473
+ 700.0
474
+ ],
475
+ "mask_score": 2.938032,
476
+ "mask_area_ratio": 0.282133,
477
+ "elapsed_seconds": 7.1679
478
+ }
479
+ },
480
+ {
481
+ "name": "drainage_grate",
482
+ "is_person": false,
483
+ "subject_type": "object",
484
+ "source_set": "obj_set",
485
+ "source_image_id": "BDD100K:b4d0e72d-3b208072:object:16",
486
+ "source_name": "drainage grate",
487
+ "source_description": "A metal drainage grate on the edge of the road on the right. Source dataset: BDD100K. Scene context: A driving scene on a multi-lane highway with a dark red minivan in the left lane, under a partly cloudy sky.",
488
+ "sub_caption": "drainage grate: A metal drainage grate on the edge of the road on the right.. Scene role: embedded in the road surface near the right curb",
489
+ "measured_bbox": [
490
+ 0.5682,
491
+ 0.6773,
492
+ 0.8089,
493
+ 0.73
494
+ ],
495
+ "detection_confidence": 0.99,
496
+ "ref_style": "white_bg_encyclopedia_photo",
497
+ "ref_image": "references/ref_drainage_grate.png",
498
+ "raw_ref_image": "references/raw_ref_drainage_grate_attempt_01.png",
499
+ "reference_verify": "references/reference_verify_drainage_grate.json",
500
+ "reference_verify_passed": true,
501
+ "reference_attempts": 1,
502
+ "sam_white_bg": {
503
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_drainage_grate_attempt_01.png",
504
+ "output": "references/ref_drainage_grate.png",
505
+ "mask": "references/sam_mask_drainage_grate.png",
506
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
507
+ "sam_model_type": "vit_b",
508
+ "sam_device": "auto",
509
+ "sam_working_size": [
510
+ 640,
511
+ 640
512
+ ],
513
+ "sam_max_side": 640,
514
+ "sam_downscale": 0.625,
515
+ "prompt_box_xyxy": [
516
+ 0.0,
517
+ 250.0,
518
+ 1023.0,
519
+ 773.0
520
+ ],
521
+ "mask_score": 3.366042,
522
+ "mask_area_ratio": 0.379179,
523
+ "elapsed_seconds": 8.3171
524
+ }
525
+ },
526
+ {
527
+ "name": "white_car",
528
+ "is_person": false,
529
+ "subject_type": "object",
530
+ "source_set": "obj_set",
531
+ "source_image_id": "BDD100K:b3a102ed-6ef54f5e:object:3",
532
+ "source_name": "white car",
533
+ "source_description": "A white car visible further down the road in the right lane. Source dataset: BDD100K. Scene context: Nighttime driving scene in a city with cars stopped in traffic and a construction site on the right.",
534
+ "sub_caption": "white car: A white car visible further down the road in the right lane.. Scene role: driving ahead in the same lane",
535
+ "measured_bbox": [
536
+ 0.4356,
537
+ 0.5036,
538
+ 0.4784,
539
+ 0.548
540
+ ],
541
+ "detection_confidence": "high",
542
+ "ref_style": "white_bg_encyclopedia_photo",
543
+ "ref_image": "references/ref_white_car.png",
544
+ "raw_ref_image": "references/raw_ref_white_car_attempt_01.png",
545
+ "reference_verify": "references/reference_verify_white_car.json",
546
+ "reference_verify_passed": true,
547
+ "reference_attempts": 1,
548
+ "sam_white_bg": {
549
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_white_car_attempt_01.png",
550
+ "output": "references/ref_white_car.png",
551
+ "mask": "references/sam_mask_white_car.png",
552
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
553
+ "sam_model_type": "vit_b",
554
+ "sam_device": "auto",
555
+ "sam_working_size": [
556
+ 640,
557
+ 640
558
+ ],
559
+ "sam_max_side": 640,
560
+ "sam_downscale": 0.625,
561
+ "prompt_box_xyxy": [
562
+ 137.0,
563
+ 215.0,
564
+ 884.0,
565
+ 819.0
566
+ ],
567
+ "mask_score": 3.442096,
568
+ "mask_area_ratio": 0.295652,
569
+ "elapsed_seconds": 7.1564
570
+ }
571
+ },
572
+ {
573
+ "name": "yellow_lines",
574
+ "is_person": false,
575
+ "subject_type": "object",
576
+ "source_set": "obj_set",
577
+ "source_image_id": "BDD100K:c417a291-7802692d:object:8",
578
+ "source_name": "yellow lines",
579
+ "source_description": "Double yellow painted lines separating opposite directions of traffic. Source dataset: BDD100K. Scene context: A pedestrian crossing a street at a crosswalk with several cars stopped around them, with storefronts visible in the background.",
580
+ "sub_caption": "yellow lines: Double yellow painted lines separating opposite directions of traffic.. Scene role: painted down the center of the road",
581
+ "measured_bbox": [
582
+ 0.0,
583
+ 0.622,
584
+ 0.2642,
585
+ 0.7692
586
+ ],
587
+ "detection_confidence": 0.99,
588
+ "ref_style": "white_bg_encyclopedia_photo",
589
+ "ref_image": "references/ref_yellow_lines.png",
590
+ "raw_ref_image": "references/raw_ref_yellow_lines_attempt_01.png",
591
+ "reference_verify": "references/reference_verify_yellow_lines.json",
592
+ "reference_verify_passed": true,
593
+ "reference_attempts": 1,
594
+ "sam_white_bg": {
595
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_yellow_lines_attempt_01.png",
596
+ "output": "references/ref_yellow_lines.png",
597
+ "mask": "references/sam_mask_yellow_lines.png",
598
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
599
+ "sam_model_type": "vit_b",
600
+ "sam_device": "auto",
601
+ "sam_working_size": [
602
+ 640,
603
+ 640
604
+ ],
605
+ "sam_max_side": 640,
606
+ "sam_downscale": 0.625,
607
+ "prompt_box_xyxy": [
608
+ 0.0,
609
+ 0.0,
610
+ 1023.0,
611
+ 1023.0
612
+ ],
613
+ "mask_score": 3.166027,
614
+ "mask_area_ratio": 0.242679,
615
+ "elapsed_seconds": 7.0941
616
+ }
617
+ },
618
+ {
619
+ "name": "street_trees",
620
+ "is_person": false,
621
+ "subject_type": "object",
622
+ "source_set": "obj_set",
623
+ "source_image_id": "BDD100K:c4891df0-24371ae1:object:3",
624
+ "source_name": "trees",
625
+ "source_description": "Numerous trees with dense green and yellowish foliage lining both sides of the road. Source dataset: BDD100K. Scene context: A dark SUV drives ahead on a wet road lined with trees and a fence under a cloudy sky.",
626
+ "sub_caption": "trees: Numerous trees with dense green foliage lining both sides of the road.. Scene role: growing along the sidewalks, adding greenery",
627
+ "measured_bbox": [
628
+ 0.2664,
629
+ 0.0,
630
+ 0.7141,
631
+ 0.5127
632
+ ],
633
+ "detection_confidence": 0.9,
634
+ "ref_style": "white_bg_encyclopedia_photo",
635
+ "ref_image": "references/ref_street_trees.png",
636
+ "raw_ref_image": "references/raw_ref_street_trees_attempt_01.png",
637
+ "reference_verify": "references/reference_verify_street_trees.json",
638
+ "reference_verify_passed": true,
639
+ "reference_attempts": 1,
640
+ "sam_white_bg": {
641
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_street_trees_attempt_01.png",
642
+ "output": "references/ref_street_trees.png",
643
+ "mask": "references/sam_mask_street_trees.png",
644
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
645
+ "sam_model_type": "vit_b",
646
+ "sam_device": "auto",
647
+ "sam_working_size": [
648
+ 640,
649
+ 640
650
+ ],
651
+ "sam_max_side": 640,
652
+ "sam_downscale": 0.625,
653
+ "prompt_box_xyxy": [
654
+ 35.0,
655
+ 55.0,
656
+ 1002.0,
657
+ 1000.0
658
+ ],
659
+ "mask_score": 3.226043,
660
+ "mask_area_ratio": 0.439437,
661
+ "elapsed_seconds": 7.0986
662
+ }
663
+ },
664
+ {
665
+ "name": "twilight_sky",
666
+ "is_person": false,
667
+ "subject_type": "object",
668
+ "source_set": "obj_set",
669
+ "source_image_id": "BDD100K:c13c0d1f-00dfd075:object:9",
670
+ "source_name": "sky",
671
+ "source_description": "Clear twilight sky transitioning from bright near the horizon to dark blue at the top. Source dataset: BDD100K. Scene context: View from a moving vehicle looking down a multi-lane city street lined with parked cars and large trees under a clear twilight sky.",
672
+ "sub_caption": "sky: Clear twilight sky transitioning from bright near the horizon to dark blue at the top.. Scene role: visible above the buildings and trees at the end of the road",
673
+ "measured_bbox": [
674
+ 0.188,
675
+ 0.0,
676
+ 0.862,
677
+ 0.4846
678
+ ],
679
+ "detection_confidence": 0.95,
680
+ "ref_style": "white_bg_encyclopedia_photo",
681
+ "ref_image": "references/ref_twilight_sky.png",
682
+ "raw_ref_image": "references/raw_ref_twilight_sky_attempt_01.png",
683
+ "reference_verify": "references/reference_verify_twilight_sky.json",
684
+ "reference_verify_passed": true,
685
+ "reference_attempts": 1,
686
+ "sam_white_bg": {
687
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000002/references/raw_ref_twilight_sky_attempt_01.png",
688
+ "output": "references/ref_twilight_sky.png",
689
+ "mask": "references/sam_mask_twilight_sky.png",
690
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
691
+ "sam_model_type": "vit_b",
692
+ "sam_device": "auto",
693
+ "sam_working_size": [
694
+ 640,
695
+ 640
696
+ ],
697
+ "sam_max_side": 640,
698
+ "sam_downscale": 0.625,
699
+ "prompt_box_xyxy": [
700
+ 72.0,
701
+ 72.0,
702
+ 951.0,
703
+ 951.0
704
+ ],
705
+ "mask_score": 3.471577,
706
+ "mask_area_ratio": 0.631801,
707
+ "elapsed_seconds": 7.5016
708
+ }
709
+ }
710
+ ],
711
+ "not_emitted": [],
712
+ "model_ids": {
713
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
714
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
715
+ }
716
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000003.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000003",
3
+ "target_total": 3,
4
+ "target_people": 1,
5
+ "target_objects": 2,
6
+ "canvas_size": [
7
+ 1280,
8
+ 720
9
+ ],
10
+ "canvas_aspect_ratio": "16:9",
11
+ "main_image": "main_image.png",
12
+ "bbox_overlay": "bbox_overlay.png",
13
+ "plan": "plan.json",
14
+ "detections": "detections.json",
15
+ "vocab_task": "vocab_task.json",
16
+ "n_planned": 3,
17
+ "n_detected": 3,
18
+ "n_subjects": 3,
19
+ "subjects": [
20
+ {
21
+ "name": "waiting_pedestrian",
22
+ "is_person": true,
23
+ "subject_type": "person",
24
+ "source_set": "people_set",
25
+ "source_image_id": "CrowdHuman:data/data_12/282555,5c403000efcca35d.jpg:person:13",
26
+ "source_name": "shopper",
27
+ "source_description": "A person standing on the top level, wearing a dark top and dark pants. Source dataset: CrowdHuman. Scene context: A multi-level outdoor shopping mall with various people walking and shopping.",
28
+ "sub_caption": "shopper: A pedestrian wearing a dark top and dark pants, standing upright with a natural posture.. Scene role: Waiting at the curb near the crosswalk on the left side of the street.",
29
+ "measured_bbox": [
30
+ 0.0928,
31
+ 0.1174,
32
+ 0.205,
33
+ 0.9401
34
+ ],
35
+ "detection_confidence": 0.95,
36
+ "ref_style": "white_bg_full_body_front",
37
+ "ref_image": "references/ref_waiting_pedestrian.png",
38
+ "raw_ref_image": "references/raw_ref_waiting_pedestrian_attempt_01.png",
39
+ "reference_verify": "references/reference_verify_waiting_pedestrian.json",
40
+ "reference_verify_passed": true,
41
+ "reference_attempts": 1,
42
+ "sam_white_bg": {
43
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000003/references/raw_ref_waiting_pedestrian_attempt_01.png",
44
+ "output": "references/ref_waiting_pedestrian.png",
45
+ "mask": "references/sam_mask_waiting_pedestrian.png",
46
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
47
+ "sam_model_type": "vit_b",
48
+ "sam_device": "auto",
49
+ "sam_working_size": [
50
+ 640,
51
+ 640
52
+ ],
53
+ "sam_max_side": 640,
54
+ "sam_downscale": 0.625,
55
+ "prompt_box_xyxy": [
56
+ 348.0,
57
+ 48.0,
58
+ 704.0,
59
+ 1015.0
60
+ ],
61
+ "mask_score": 3.427649,
62
+ "mask_area_ratio": 0.155239,
63
+ "elapsed_seconds": 6.9951
64
+ }
65
+ },
66
+ {
67
+ "name": "black_sedan",
68
+ "is_person": false,
69
+ "subject_type": "object",
70
+ "source_set": "obj_set",
71
+ "source_image_id": "BDD100K:c0c183ff-1b24f541:object:7",
72
+ "source_name": "black sedan",
73
+ "source_description": "A black sedan driving in the right lane ahead of the red hatchback. Source dataset: BDD100K. Scene context: A view from a vehicle driving down a multi-lane city street flanked by tall buildings, with various cars moving in the same direction and parked along the side.",
74
+ "sub_caption": "black sedan: A glossy black sedan with visible headlights and a detailed front grille.. Scene role: Approaching the crosswalk in the center traffic lane.",
75
+ "measured_bbox": [
76
+ 0.3895,
77
+ 0.2431,
78
+ 0.591,
79
+ 0.5084
80
+ ],
81
+ "detection_confidence": 0.99,
82
+ "ref_style": "white_bg_encyclopedia_photo",
83
+ "ref_image": "references/ref_black_sedan.png",
84
+ "raw_ref_image": "references/raw_ref_black_sedan_attempt_01.png",
85
+ "reference_verify": "references/reference_verify_black_sedan.json",
86
+ "reference_verify_passed": true,
87
+ "reference_attempts": 1,
88
+ "sam_white_bg": {
89
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000003/references/raw_ref_black_sedan_attempt_01.png",
90
+ "output": "references/ref_black_sedan.png",
91
+ "mask": "references/sam_mask_black_sedan.png",
92
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
93
+ "sam_model_type": "vit_b",
94
+ "sam_device": "auto",
95
+ "sam_working_size": [
96
+ 640,
97
+ 640
98
+ ],
99
+ "sam_max_side": 640,
100
+ "sam_downscale": 0.625,
101
+ "prompt_box_xyxy": [
102
+ 0.0,
103
+ 221.0,
104
+ 1023.0,
105
+ 796.0
106
+ ],
107
+ "mask_score": 3.446312,
108
+ "mask_area_ratio": 0.340465,
109
+ "elapsed_seconds": 7.2258
110
+ }
111
+ },
112
+ {
113
+ "name": "silver_car",
114
+ "is_person": false,
115
+ "subject_type": "object",
116
+ "source_set": "obj_set",
117
+ "source_image_id": "BDD100K:be49ae7a-1ffaa683:object:2",
118
+ "source_name": "silver car",
119
+ "source_description": "A silver car driving in the adjacent right lane, slightly ahead of the white car. Source dataset: BDD100K. Scene context: View from inside a vehicle driving in heavy traffic on a multi-lane road under a clear sky.",
120
+ "sub_caption": "silver car: A metallic silver car reflecting daylight.. Scene role: Driving in the right lane slightly ahead of the black sedan, approaching the intersection.",
121
+ "measured_bbox": [
122
+ 0.6628,
123
+ 0.2419,
124
+ 0.9089,
125
+ 0.4999
126
+ ],
127
+ "detection_confidence": 0.99,
128
+ "ref_style": "white_bg_encyclopedia_photo",
129
+ "ref_image": "references/ref_silver_car.png",
130
+ "raw_ref_image": "references/raw_ref_silver_car_attempt_01.png",
131
+ "reference_verify": "references/reference_verify_silver_car.json",
132
+ "reference_verify_passed": true,
133
+ "reference_attempts": 1,
134
+ "sam_white_bg": {
135
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000003/references/raw_ref_silver_car_attempt_01.png",
136
+ "output": "references/ref_silver_car.png",
137
+ "mask": "references/sam_mask_silver_car.png",
138
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
139
+ "sam_model_type": "vit_b",
140
+ "sam_device": "auto",
141
+ "sam_working_size": [
142
+ 640,
143
+ 640
144
+ ],
145
+ "sam_max_side": 640,
146
+ "sam_downscale": 0.625,
147
+ "prompt_box_xyxy": [
148
+ 13.0,
149
+ 220.0,
150
+ 1011.0,
151
+ 811.0
152
+ ],
153
+ "mask_score": 3.077144,
154
+ "mask_area_ratio": 0.338042,
155
+ "elapsed_seconds": 7.0902
156
+ }
157
+ }
158
+ ],
159
+ "not_emitted": [],
160
+ "model_ids": {
161
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
162
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
163
+ }
164
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000004.json ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000004",
3
+ "target_total": 5,
4
+ "target_people": 1,
5
+ "target_objects": 4,
6
+ "canvas_size": [
7
+ 1248,
8
+ 832
9
+ ],
10
+ "canvas_aspect_ratio": "3:2",
11
+ "main_image": "main_image.png",
12
+ "bbox_overlay": "bbox_overlay.png",
13
+ "plan": "plan.json",
14
+ "detections": "detections.json",
15
+ "vocab_task": "vocab_task.json",
16
+ "n_planned": 5,
17
+ "n_detected": 5,
18
+ "n_subjects": 5,
19
+ "subjects": [
20
+ {
21
+ "name": "walker",
22
+ "is_person": true,
23
+ "subject_type": "person",
24
+ "source_set": "people_set",
25
+ "source_image_id": "CrowdHuman:data/data_51/273275,145927000354f7525.jpg:person:10",
26
+ "source_name": "walker",
27
+ "source_description": "A man walking, wearing a blue hoodie with 'EMO' written on it, grey pants, and dark shoes. Source dataset: CrowdHuman. Scene context: A group of people, some wearing matching white t-shirts, are walking and jogging along a paved path next to a road, with a grey SUV parked on a grassy hill in the background.",
28
+ "sub_caption": "walker: A man walking, wearing a plain blue hoodie, grey pants, and dark shoes.. Scene role: Standing at the edge of the sidewalk near the crosswalk, waiting to cross the street.",
29
+ "measured_bbox": [
30
+ 0.7914,
31
+ 0.2893,
32
+ 0.834,
33
+ 0.4815
34
+ ],
35
+ "detection_confidence": 1.0,
36
+ "ref_style": "white_bg_full_body_front",
37
+ "ref_image": "references/ref_walker.png",
38
+ "raw_ref_image": "references/raw_ref_walker_attempt_01.png",
39
+ "reference_verify": "references/reference_verify_walker.json",
40
+ "reference_verify_passed": true,
41
+ "reference_attempts": 1,
42
+ "sam_white_bg": {
43
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_walker_attempt_01.png",
44
+ "output": "references/ref_walker.png",
45
+ "mask": "references/sam_mask_walker.png",
46
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
47
+ "sam_model_type": "vit_b",
48
+ "sam_device": "auto",
49
+ "sam_working_size": [
50
+ 640,
51
+ 640
52
+ ],
53
+ "sam_max_side": 640,
54
+ "sam_downscale": 0.625,
55
+ "prompt_box_xyxy": [
56
+ 336.0,
57
+ 51.0,
58
+ 688.0,
59
+ 1005.0
60
+ ],
61
+ "mask_score": 3.433924,
62
+ "mask_area_ratio": 0.16005,
63
+ "elapsed_seconds": 7.2846
64
+ }
65
+ },
66
+ {
67
+ "name": "traffic_light",
68
+ "is_person": false,
69
+ "subject_type": "object",
70
+ "source_set": "obj_set",
71
+ "source_image_id": "BDD100K:b6df605f-51c158b8:object:6",
72
+ "source_name": "traffic light",
73
+ "source_description": "A set of traffic lights suspended over the intersection, showing a red light. Source dataset: BDD100K. Scene context: A view from a vehicle driving down a city street on a sunny day with a U-Haul truck in the opposite lane, parked cars along the right curb, and pedestrians crossing a crosswalk.",
74
+ "sub_caption": "traffic light: A set of traffic lights suspended over the intersection, showing a red light.. Scene role: Hanging high above the intersection in the upper-center of the frame.",
75
+ "measured_bbox": [
76
+ 0.4425,
77
+ 0.023,
78
+ 0.467,
79
+ 0.1052
80
+ ],
81
+ "detection_confidence": 0.99,
82
+ "ref_style": "white_bg_encyclopedia_photo",
83
+ "ref_image": "references/ref_traffic_light.png",
84
+ "raw_ref_image": "references/raw_ref_traffic_light_attempt_01.png",
85
+ "reference_verify": "references/reference_verify_traffic_light.json",
86
+ "reference_verify_passed": true,
87
+ "reference_attempts": 1,
88
+ "sam_white_bg": {
89
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_traffic_light_attempt_01.png",
90
+ "output": "references/ref_traffic_light.png",
91
+ "mask": "references/sam_mask_traffic_light.png",
92
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
93
+ "sam_model_type": "vit_b",
94
+ "sam_device": "auto",
95
+ "sam_working_size": [
96
+ 640,
97
+ 640
98
+ ],
99
+ "sam_max_side": 640,
100
+ "sam_downscale": 0.625,
101
+ "prompt_box_xyxy": [
102
+ 160.0,
103
+ 93.0,
104
+ 864.0,
105
+ 930.0
106
+ ],
107
+ "mask_score": 3.437579,
108
+ "mask_area_ratio": 0.253583,
109
+ "elapsed_seconds": 7.0663
110
+ }
111
+ },
112
+ {
113
+ "name": "delivery_truck",
114
+ "is_person": false,
115
+ "subject_type": "object",
116
+ "source_set": "obj_set",
117
+ "source_image_id": "BDD100K:b3a7b21a-48bcf2b8:object:2",
118
+ "source_name": "delivery truck",
119
+ "source_description": "A large white box truck parked behind the fence on the right, with visible green and purple logos. Source dataset: BDD100K. Scene context: A view from a vehicle driving down a wide, paved road flanked by an industrial area with fences and delivery trucks under a cloudy sky.",
120
+ "sub_caption": "delivery truck: A large, plain white box delivery truck.. Scene role: Parked alongside the right curb in the background, past the intersection.",
121
+ "measured_bbox": [
122
+ 0.576,
123
+ 0.1929,
124
+ 0.7135,
125
+ 0.4081
126
+ ],
127
+ "detection_confidence": 0.99,
128
+ "ref_style": "white_bg_encyclopedia_photo",
129
+ "ref_image": "references/ref_delivery_truck.png",
130
+ "raw_ref_image": "references/raw_ref_delivery_truck_attempt_01.png",
131
+ "reference_verify": "references/reference_verify_delivery_truck.json",
132
+ "reference_verify_passed": true,
133
+ "reference_attempts": 1,
134
+ "sam_white_bg": {
135
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_delivery_truck_attempt_01.png",
136
+ "output": "references/ref_delivery_truck.png",
137
+ "mask": "references/sam_mask_delivery_truck.png",
138
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
139
+ "sam_model_type": "vit_b",
140
+ "sam_device": "auto",
141
+ "sam_working_size": [
142
+ 640,
143
+ 640
144
+ ],
145
+ "sam_max_side": 640,
146
+ "sam_downscale": 0.625,
147
+ "prompt_box_xyxy": [
148
+ 95.0,
149
+ 100.0,
150
+ 910.0,
151
+ 932.0
152
+ ],
153
+ "mask_score": 3.445823,
154
+ "mask_area_ratio": 0.476913,
155
+ "elapsed_seconds": 7.1923
156
+ }
157
+ },
158
+ {
159
+ "name": "dark_parked_car",
160
+ "is_person": false,
161
+ "subject_type": "object",
162
+ "source_set": "obj_set",
163
+ "source_image_id": "BDD100K:bcd37eef-1b958ae3:object:5",
164
+ "source_name": "dark parked car",
165
+ "source_description": "Another dark-colored car parked on the right side of the street. Source dataset: BDD100K. Scene context: A nighttime view from a vehicle driving down a multi-lane city street with parked cars on the right and oncoming traffic on the left.",
166
+ "sub_caption": "dark parked car: A dark-colored sedan.. Scene role: Parked on the right side of the street near the sidewalk in the mid-ground.",
167
+ "measured_bbox": [
168
+ 0.8414,
169
+ 0.3717,
170
+ 0.9967,
171
+ 0.7454
172
+ ],
173
+ "detection_confidence": 0.98,
174
+ "ref_style": "white_bg_encyclopedia_photo",
175
+ "ref_image": "references/ref_dark_parked_car.png",
176
+ "raw_ref_image": "references/raw_ref_dark_parked_car_attempt_01.png",
177
+ "reference_verify": "references/reference_verify_dark_parked_car.json",
178
+ "reference_verify_passed": true,
179
+ "reference_attempts": 1,
180
+ "sam_white_bg": {
181
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_dark_parked_car_attempt_01.png",
182
+ "output": "references/ref_dark_parked_car.png",
183
+ "mask": "references/sam_mask_dark_parked_car.png",
184
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
185
+ "sam_model_type": "vit_b",
186
+ "sam_device": "auto",
187
+ "sam_working_size": [
188
+ 640,
189
+ 640
190
+ ],
191
+ "sam_max_side": 640,
192
+ "sam_downscale": 0.625,
193
+ "prompt_box_xyxy": [
194
+ 19.0,
195
+ 336.0,
196
+ 1003.0,
197
+ 700.0
198
+ ],
199
+ "mask_score": 3.408233,
200
+ "mask_area_ratio": 0.181406,
201
+ "elapsed_seconds": 8.4178
202
+ }
203
+ },
204
+ {
205
+ "name": "street_lines",
206
+ "is_person": false,
207
+ "subject_type": "object",
208
+ "source_set": "obj_set",
209
+ "source_image_id": "BDD100K:c0c9ec9a-d3638a82:object:6",
210
+ "source_name": "street lines",
211
+ "source_description": "Double yellow lines separating traffic directions and white painted lines indicating lanes and crosswalks. Source dataset: BDD100K. Scene context: A view from inside a car driving on a city street, approaching an intersection underneath an elevated railway structure.",
212
+ "sub_caption": "street lines: Double yellow lines separating traffic directions and crisp white painted lines indicating lanes and a crosswalk.. Scene role: Painted on the asphalt road surface, extending from the foreground toward the intersection.",
213
+ "measured_bbox": [
214
+ 0.003,
215
+ 0.3541,
216
+ 0.915,
217
+ 0.8612
218
+ ],
219
+ "detection_confidence": 0.99,
220
+ "ref_style": "white_bg_encyclopedia_photo",
221
+ "ref_image": "references/ref_street_lines.png",
222
+ "raw_ref_image": "references/raw_ref_street_lines_attempt_01.png",
223
+ "reference_verify": "references/reference_verify_street_lines.json",
224
+ "reference_verify_passed": true,
225
+ "reference_attempts": 1,
226
+ "sam_white_bg": {
227
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000004/references/raw_ref_street_lines_attempt_01.png",
228
+ "output": "references/ref_street_lines.png",
229
+ "mask": "references/sam_mask_street_lines.png",
230
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
231
+ "sam_model_type": "vit_b",
232
+ "sam_device": "auto",
233
+ "sam_working_size": [
234
+ 640,
235
+ 640
236
+ ],
237
+ "sam_max_side": 640,
238
+ "sam_downscale": 0.625,
239
+ "prompt_box_xyxy": [
240
+ 38.0,
241
+ 225.0,
242
+ 985.0,
243
+ 799.0
244
+ ],
245
+ "mask_score": 3.287982,
246
+ "mask_area_ratio": 0.400985,
247
+ "elapsed_seconds": 7.2613
248
+ }
249
+ }
250
+ ],
251
+ "not_emitted": [],
252
+ "model_ids": {
253
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
254
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
255
+ }
256
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000005.json ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000005",
3
+ "target_total": 6,
4
+ "target_people": 3,
5
+ "target_objects": 3,
6
+ "canvas_size": [
7
+ 1248,
8
+ 832
9
+ ],
10
+ "canvas_aspect_ratio": "3:2",
11
+ "main_image": "main_image.png",
12
+ "bbox_overlay": "bbox_overlay.png",
13
+ "plan": "plan.json",
14
+ "detections": "detections.json",
15
+ "vocab_task": "vocab_task.json",
16
+ "n_planned": 6,
17
+ "n_detected": 6,
18
+ "n_subjects": 6,
19
+ "subjects": [
20
+ {
21
+ "name": "firefighter",
22
+ "is_person": true,
23
+ "subject_type": "person",
24
+ "source_set": "people_set",
25
+ "source_image_id": "CrowdHuman:data/data_2/282555,e0af90003451118a.jpg:person:8",
26
+ "source_name": "firefighter",
27
+ "source_description": "Wearing a dark uniform with yellow reflective stripes and a white helmet, standing facing away near the fire truck. Source dataset: CrowdHuman. Scene context: Emergency response personnel, including firefighters and ambulance crew, are gathered outside a large classical building with pillars and banners, accompanied by emergency vehicles.",
28
+ "sub_caption": "firefighter: Wearing a dark uniform with yellow reflective stripes and a white helmet, standing facing away.. Scene role: Assisting with incident management, positioned near the stopped car and barrier.",
29
+ "measured_bbox": [
30
+ 0.2626,
31
+ 0.3463,
32
+ 0.3289,
33
+ 0.6561
34
+ ],
35
+ "detection_confidence": 0.99,
36
+ "ref_style": "white_bg_full_body_front",
37
+ "ref_image": "references/ref_firefighter.png",
38
+ "raw_ref_image": "references/raw_ref_firefighter_attempt_01.png",
39
+ "reference_verify": "references/reference_verify_firefighter.json",
40
+ "reference_verify_passed": true,
41
+ "reference_attempts": 1,
42
+ "sam_white_bg": {
43
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_firefighter_attempt_01.png",
44
+ "output": "references/ref_firefighter.png",
45
+ "mask": "references/sam_mask_firefighter.png",
46
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
47
+ "sam_model_type": "vit_b",
48
+ "sam_device": "auto",
49
+ "sam_working_size": [
50
+ 640,
51
+ 640
52
+ ],
53
+ "sam_max_side": 640,
54
+ "sam_downscale": 0.625,
55
+ "prompt_box_xyxy": [
56
+ 317.0,
57
+ 34.0,
58
+ 709.0,
59
+ 1009.0
60
+ ],
61
+ "mask_score": 3.445343,
62
+ "mask_area_ratio": 0.178691,
63
+ "elapsed_seconds": 7.0362
64
+ }
65
+ },
66
+ {
67
+ "name": "uniformed_officer",
68
+ "is_person": true,
69
+ "subject_type": "person",
70
+ "source_set": "people_set",
71
+ "source_image_id": "CrowdHuman:data/data_1/273275,f68c20007e0bf148.jpg:person:3",
72
+ "source_name": "uniformed officer",
73
+ "source_description": "wearing a khaki uniform and helmet, holding a baton, looking towards the left Source dataset: CrowdHuman. Scene context: A large crowd of people, including some in uniform with batons and helmets, stands in front of a red and yellow building.",
74
+ "sub_caption": "uniformed officer: Wearing a khaki uniform and helmet, holding a baton, looking towards the left.. Scene role: Directing surrounding traffic away from the stopped vehicle using a baton.",
75
+ "measured_bbox": [
76
+ 0.0497,
77
+ 0.3566,
78
+ 0.1691,
79
+ 0.6118
80
+ ],
81
+ "detection_confidence": 0.99,
82
+ "ref_style": "white_bg_full_body_front",
83
+ "ref_image": "references/ref_uniformed_officer.png",
84
+ "raw_ref_image": "references/raw_ref_uniformed_officer_attempt_01.png",
85
+ "reference_verify": "references/reference_verify_uniformed_officer.json",
86
+ "reference_verify_passed": true,
87
+ "reference_attempts": 1,
88
+ "sam_white_bg": {
89
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_uniformed_officer_attempt_01.png",
90
+ "output": "references/ref_uniformed_officer.png",
91
+ "mask": "references/sam_mask_uniformed_officer.png",
92
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
93
+ "sam_model_type": "vit_b",
94
+ "sam_device": "auto",
95
+ "sam_working_size": [
96
+ 640,
97
+ 640
98
+ ],
99
+ "sam_max_side": 640,
100
+ "sam_downscale": 0.625,
101
+ "prompt_box_xyxy": [
102
+ 331.0,
103
+ 24.0,
104
+ 689.0,
105
+ 1005.0
106
+ ],
107
+ "mask_score": 3.475629,
108
+ "mask_area_ratio": 0.156165,
109
+ "elapsed_seconds": 7.0984
110
+ }
111
+ },
112
+ {
113
+ "name": "bystander_in_suit",
114
+ "is_person": true,
115
+ "subject_type": "person",
116
+ "source_set": "people_set",
117
+ "source_image_id": "CrowdHuman:data/data_28/273278,b62280001bda6f1a.jpg:person:19",
118
+ "source_name": "crowd member",
119
+ "source_description": "A person far right in the background wearing a suit. Source dataset: CrowdHuman. Scene context: A crowd of people gathers in front of an old stone building with a prominent arched doorway and two large animal statues.",
120
+ "sub_caption": "crowd member: A person wearing a suit.. Scene role: Standing off to the right side of the road behind the barrier, acting as the driver or an involved pedestrian.",
121
+ "measured_bbox": [
122
+ 0.7467,
123
+ 0.3318,
124
+ 0.8036,
125
+ 0.5111
126
+ ],
127
+ "detection_confidence": 0.98,
128
+ "ref_style": "white_bg_full_body_front",
129
+ "ref_image": "references/ref_bystander_in_suit.png",
130
+ "raw_ref_image": "references/raw_ref_bystander_in_suit_attempt_01.png",
131
+ "reference_verify": "references/reference_verify_bystander_in_suit.json",
132
+ "reference_verify_passed": true,
133
+ "reference_attempts": 1,
134
+ "sam_white_bg": {
135
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_bystander_in_suit_attempt_01.png",
136
+ "output": "references/ref_bystander_in_suit.png",
137
+ "mask": "references/sam_mask_bystander_in_suit.png",
138
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
139
+ "sam_model_type": "vit_b",
140
+ "sam_device": "auto",
141
+ "sam_working_size": [
142
+ 640,
143
+ 640
144
+ ],
145
+ "sam_max_side": 640,
146
+ "sam_downscale": 0.625,
147
+ "prompt_box_xyxy": [
148
+ 341.0,
149
+ 59.0,
150
+ 677.0,
151
+ 996.0
152
+ ],
153
+ "mask_score": 3.480669,
154
+ "mask_area_ratio": 0.144797,
155
+ "elapsed_seconds": 7.0242
156
+ }
157
+ },
158
+ {
159
+ "name": "traffic_light",
160
+ "is_person": false,
161
+ "subject_type": "object",
162
+ "source_set": "obj_set",
163
+ "source_image_id": "CrowdHuman:data/data_36/273275,6a11d000f52c34a9.jpg:object:0",
164
+ "source_name": "traffic light",
165
+ "source_description": "A black multi-lens traffic light fixture mounted on a pole above the street. Source dataset: CrowdHuman. Scene context: A male tour guide is speaking to a group of people standing on a city sidewalk next to a road crossing.",
166
+ "sub_caption": "traffic light: A black multi-lens traffic light fixture mounted on a pole above the street.. Scene role: Hanging overhead or mounted prominently on a pole at the intersection.",
167
+ "measured_bbox": [
168
+ 0.5381,
169
+ 0.0316,
170
+ 0.5856,
171
+ 0.2076
172
+ ],
173
+ "detection_confidence": 0.99,
174
+ "ref_style": "white_bg_encyclopedia_photo",
175
+ "ref_image": "references/ref_traffic_light.png",
176
+ "raw_ref_image": "references/raw_ref_traffic_light_attempt_01.png",
177
+ "reference_verify": "references/reference_verify_traffic_light.json",
178
+ "reference_verify_passed": true,
179
+ "reference_attempts": 1,
180
+ "sam_white_bg": {
181
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_traffic_light_attempt_01.png",
182
+ "output": "references/ref_traffic_light.png",
183
+ "mask": "references/sam_mask_traffic_light.png",
184
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
185
+ "sam_model_type": "vit_b",
186
+ "sam_device": "auto",
187
+ "sam_working_size": [
188
+ 640,
189
+ 640
190
+ ],
191
+ "sam_max_side": 640,
192
+ "sam_downscale": 0.625,
193
+ "prompt_box_xyxy": [
194
+ 272.0,
195
+ 15.0,
196
+ 750.0,
197
+ 1006.0
198
+ ],
199
+ "mask_score": 3.448339,
200
+ "mask_area_ratio": 0.303974,
201
+ "elapsed_seconds": 8.3734
202
+ }
203
+ },
204
+ {
205
+ "name": "concrete_barrier",
206
+ "is_person": false,
207
+ "subject_type": "object",
208
+ "source_set": "obj_set",
209
+ "source_image_id": "BDD100K:c946c532-07177e0a:object:11",
210
+ "source_name": "concrete barrier",
211
+ "source_description": "A continuous low concrete wall acting as a barrier on the right side of the road. Source dataset: BDD100K. Scene context: View from inside a vehicle driving on a multi-lane highway during the day, with construction or industrial sites visible alongside.",
212
+ "sub_caption": "concrete barrier: A continuous low concrete wall acting as a barrier on the right side of the road.. Scene role: Lining the right side of the street, separating the pedestrian walkway or construction zone from the active traffic lane.",
213
+ "measured_bbox": [
214
+ 0.6322,
215
+ 0.4972,
216
+ 0.9964,
217
+ 0.6985
218
+ ],
219
+ "detection_confidence": 0.99,
220
+ "ref_style": "white_bg_encyclopedia_photo",
221
+ "ref_image": "references/ref_concrete_barrier.png",
222
+ "raw_ref_image": "references/raw_ref_concrete_barrier_attempt_01.png",
223
+ "reference_verify": "references/reference_verify_concrete_barrier.json",
224
+ "reference_verify_passed": true,
225
+ "reference_attempts": 1,
226
+ "sam_white_bg": {
227
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_concrete_barrier_attempt_01.png",
228
+ "output": "references/ref_concrete_barrier.png",
229
+ "mask": "references/sam_mask_concrete_barrier.png",
230
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
231
+ "sam_model_type": "vit_b",
232
+ "sam_device": "auto",
233
+ "sam_working_size": [
234
+ 640,
235
+ 640
236
+ ],
237
+ "sam_max_side": 640,
238
+ "sam_downscale": 0.625,
239
+ "prompt_box_xyxy": [
240
+ 53.0,
241
+ 219.0,
242
+ 970.0,
243
+ 811.0
244
+ ],
245
+ "mask_score": 3.469119,
246
+ "mask_area_ratio": 0.3653,
247
+ "elapsed_seconds": 7.0274
248
+ }
249
+ },
250
+ {
251
+ "name": "silver_car",
252
+ "is_person": false,
253
+ "subject_type": "object",
254
+ "source_set": "obj_set",
255
+ "source_image_id": "BDD100K:be3d3a81-326a032d:object:0",
256
+ "source_name": "silver car",
257
+ "source_description": "A silver compact hatchback car facing forward, waiting at an intersection. Its brake lights are on. Source dataset: BDD100K. Scene context: A rainy street scene showing cars waiting at an intersection surrounded by tall buildings and urban infrastructure.",
258
+ "sub_caption": "silver car: A silver compact hatchback car facing forward, waiting at an intersection with illuminated brake lights.. Scene role: Stopped in the active lane near the barrier, serving as the focal point of the traffic response.",
259
+ "measured_bbox": [
260
+ 0.3396,
261
+ 0.3754,
262
+ 0.6399,
263
+ 0.6647
264
+ ],
265
+ "detection_confidence": 0.99,
266
+ "ref_style": "white_bg_encyclopedia_photo",
267
+ "ref_image": "references/ref_silver_car.png",
268
+ "raw_ref_image": "references/raw_ref_silver_car_attempt_01.png",
269
+ "reference_verify": "references/reference_verify_silver_car.json",
270
+ "reference_verify_passed": true,
271
+ "reference_attempts": 1,
272
+ "sam_white_bg": {
273
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000005/references/raw_ref_silver_car_attempt_01.png",
274
+ "output": "references/ref_silver_car.png",
275
+ "mask": "references/sam_mask_silver_car.png",
276
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
277
+ "sam_model_type": "vit_b",
278
+ "sam_device": "auto",
279
+ "sam_working_size": [
280
+ 640,
281
+ 640
282
+ ],
283
+ "sam_max_side": 640,
284
+ "sam_downscale": 0.625,
285
+ "prompt_box_xyxy": [
286
+ 46.0,
287
+ 215.0,
288
+ 976.0,
289
+ 829.0
290
+ ],
291
+ "mask_score": 3.457698,
292
+ "mask_area_ratio": 0.330622,
293
+ "elapsed_seconds": 7.0933
294
+ }
295
+ }
296
+ ],
297
+ "not_emitted": [],
298
+ "model_ids": {
299
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
300
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
301
+ }
302
+ }
samples_v8/driving/BDD100K_CityPersons_CrowdHuman_samples/rows/sample_000006.json ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_id": "sample_000006",
3
+ "target_total": 8,
4
+ "target_people": 1,
5
+ "target_objects": 7,
6
+ "canvas_size": [
7
+ 1280,
8
+ 720
9
+ ],
10
+ "canvas_aspect_ratio": "16:9",
11
+ "main_image": "main_image.png",
12
+ "bbox_overlay": "bbox_overlay.png",
13
+ "plan": "plan.json",
14
+ "detections": "detections.json",
15
+ "vocab_task": "vocab_task.json",
16
+ "n_planned": 8,
17
+ "n_detected": 8,
18
+ "n_subjects": 8,
19
+ "subjects": [
20
+ {
21
+ "name": "distant_pedestrian",
22
+ "is_person": true,
23
+ "subject_type": "person",
24
+ "source_set": "people_set",
25
+ "source_image_id": "CrowdHuman:data/data_73/283991,17cd800008079067.jpg:person:18",
26
+ "source_name": "pedestrian",
27
+ "source_description": "Another person in the distant background near the green structure. Source dataset: CrowdHuman. Scene context: A large crowd of people walking across a street with trees, streetlamps, and classic architecture in the background.",
28
+ "sub_caption": "pedestrian: A person walking across the street in the distant background.. Scene role: Crossing the crosswalk in the distance ahead of the approaching vehicles.",
29
+ "measured_bbox": [
30
+ 0.3877,
31
+ 0.478,
32
+ 0.4204,
33
+ 0.5881
34
+ ],
35
+ "detection_confidence": 0.9,
36
+ "ref_style": "white_bg_full_body_front",
37
+ "ref_image": "references/ref_distant_pedestrian.png",
38
+ "raw_ref_image": "references/raw_ref_distant_pedestrian_attempt_01.png",
39
+ "reference_verify": "references/reference_verify_distant_pedestrian.json",
40
+ "reference_verify_passed": true,
41
+ "reference_attempts": 1,
42
+ "sam_white_bg": {
43
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_distant_pedestrian_attempt_01.png",
44
+ "output": "references/ref_distant_pedestrian.png",
45
+ "mask": "references/sam_mask_distant_pedestrian.png",
46
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
47
+ "sam_model_type": "vit_b",
48
+ "sam_device": "auto",
49
+ "sam_working_size": [
50
+ 640,
51
+ 640
52
+ ],
53
+ "sam_max_side": 640,
54
+ "sam_downscale": 0.625,
55
+ "prompt_box_xyxy": [
56
+ 324.0,
57
+ 9.0,
58
+ 705.0,
59
+ 1015.0
60
+ ],
61
+ "mask_score": 3.338419,
62
+ "mask_area_ratio": 0.174056,
63
+ "elapsed_seconds": 8.694
64
+ }
65
+ },
66
+ {
67
+ "name": "vertical_illuminated_sign",
68
+ "is_person": false,
69
+ "subject_type": "object",
70
+ "source_set": "obj_set",
71
+ "source_image_id": "BDD100K:bb1b7e42-9608265e:object:6",
72
+ "source_name": "street sign",
73
+ "source_description": "A vertical 'PARK' sign illuminated on the right side of the street, indicating a parking garage. Source dataset: BDD100K. Scene context: A dashcam view from a vehicle driving down a city street with tall buildings on both sides, following a yellow taxi, with other cars parked and driving.",
74
+ "sub_caption": "street sign: A vertical illuminated neon sign with abstract shapes, glowing brightly.. Scene role: Mounted on the building facade on the right side of the street, adding ambient night lighting.",
75
+ "measured_bbox": [
76
+ 0.7683,
77
+ 0.0355,
78
+ 0.8177,
79
+ 0.2837
80
+ ],
81
+ "detection_confidence": 0.99,
82
+ "ref_style": "white_bg_encyclopedia_photo",
83
+ "ref_image": "references/ref_vertical_illuminated_sign.png",
84
+ "raw_ref_image": "references/raw_ref_vertical_illuminated_sign_attempt_01.png",
85
+ "reference_verify": "references/reference_verify_vertical_illuminated_sign.json",
86
+ "reference_verify_passed": true,
87
+ "reference_attempts": 1,
88
+ "sam_white_bg": {
89
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_vertical_illuminated_sign_attempt_01.png",
90
+ "output": "references/ref_vertical_illuminated_sign.png",
91
+ "mask": "references/sam_mask_vertical_illuminated_sign.png",
92
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
93
+ "sam_model_type": "vit_b",
94
+ "sam_device": "auto",
95
+ "sam_working_size": [
96
+ 640,
97
+ 640
98
+ ],
99
+ "sam_max_side": 640,
100
+ "sam_downscale": 0.625,
101
+ "prompt_box_xyxy": [
102
+ 305.0,
103
+ 20.0,
104
+ 728.0,
105
+ 1002.0
106
+ ],
107
+ "mask_score": 3.37343,
108
+ "mask_area_ratio": 0.273593,
109
+ "elapsed_seconds": 7.1332
110
+ }
111
+ },
112
+ {
113
+ "name": "emergency_vehicle",
114
+ "is_person": false,
115
+ "subject_type": "object",
116
+ "source_set": "obj_set",
117
+ "source_image_id": "BDD100K:b99f250d-886111c5:object:5",
118
+ "source_name": "vehicle",
119
+ "source_description": "A dark-colored vehicle partially visible in the left background with blue emergency lights flashing. Source dataset: BDD100K. Scene context: A nighttime city street intersection showing a crosswalk, construction barriers, and illuminated traffic signals.",
120
+ "sub_caption": "vehicle: A dark-colored vehicle with bright blue emergency lights flashing.. Scene role: Parked on the left side of the street near the intersection.",
121
+ "measured_bbox": [
122
+ 0.1031,
123
+ 0.4564,
124
+ 0.2827,
125
+ 0.6497
126
+ ],
127
+ "detection_confidence": 0.95,
128
+ "ref_style": "white_bg_encyclopedia_photo",
129
+ "ref_image": "references/ref_emergency_vehicle.png",
130
+ "raw_ref_image": "references/raw_ref_emergency_vehicle_attempt_01.png",
131
+ "reference_verify": "references/reference_verify_emergency_vehicle.json",
132
+ "reference_verify_passed": true,
133
+ "reference_attempts": 1,
134
+ "sam_white_bg": {
135
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_emergency_vehicle_attempt_01.png",
136
+ "output": "references/ref_emergency_vehicle.png",
137
+ "mask": "references/sam_mask_emergency_vehicle.png",
138
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
139
+ "sam_model_type": "vit_b",
140
+ "sam_device": "auto",
141
+ "sam_working_size": [
142
+ 640,
143
+ 640
144
+ ],
145
+ "sam_max_side": 640,
146
+ "sam_downscale": 0.625,
147
+ "prompt_box_xyxy": [
148
+ 8.0,
149
+ 237.0,
150
+ 1015.0,
151
+ 828.0
152
+ ],
153
+ "mask_score": 3.468468,
154
+ "mask_area_ratio": 0.355034,
155
+ "elapsed_seconds": 7.0896
156
+ }
157
+ },
158
+ {
159
+ "name": "white_panel_van",
160
+ "is_person": false,
161
+ "subject_type": "object",
162
+ "source_set": "obj_set",
163
+ "source_image_id": "BDD100K:b5047c50-e1facff6:object:2",
164
+ "source_name": "white van",
165
+ "source_description": "A large white panel van with red taillights illuminated, driving in the right lane ahead. Source dataset: BDD100K. Scene context: View from inside a vehicle driving down a multi-lane city street on a sunny day with moderate traffic.",
166
+ "sub_caption": "white van: A large white panel van with red taillights illuminated.. Scene role: Driving in the lane directly ahead of the camera perspective.",
167
+ "measured_bbox": [
168
+ 0.4556,
169
+ 0.3288,
170
+ 0.5926,
171
+ 0.6597
172
+ ],
173
+ "detection_confidence": 0.99,
174
+ "ref_style": "white_bg_encyclopedia_photo",
175
+ "ref_image": "references/ref_white_panel_van.png",
176
+ "raw_ref_image": "references/raw_ref_white_panel_van_attempt_01.png",
177
+ "reference_verify": "references/reference_verify_white_panel_van.json",
178
+ "reference_verify_passed": true,
179
+ "reference_attempts": 1,
180
+ "sam_white_bg": {
181
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_white_panel_van_attempt_01.png",
182
+ "output": "references/ref_white_panel_van.png",
183
+ "mask": "references/sam_mask_white_panel_van.png",
184
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
185
+ "sam_model_type": "vit_b",
186
+ "sam_device": "auto",
187
+ "sam_working_size": [
188
+ 640,
189
+ 640
190
+ ],
191
+ "sam_max_side": 640,
192
+ "sam_downscale": 0.625,
193
+ "prompt_box_xyxy": [
194
+ 181.0,
195
+ 63.0,
196
+ 843.0,
197
+ 937.0
198
+ ],
199
+ "mask_score": 2.636854,
200
+ "mask_area_ratio": 0.376409,
201
+ "elapsed_seconds": 7.1379
202
+ }
203
+ },
204
+ {
205
+ "name": "double_solid_line",
206
+ "is_person": false,
207
+ "subject_type": "object",
208
+ "source_set": "obj_set",
209
+ "source_image_id": "BDD100K:bc886d37-5b22c313:object:7",
210
+ "source_name": "double solid white line",
211
+ "source_description": "Two continuous white painted lines on the dark asphalt road surface, separating the two lanes of traffic. Source dataset: BDD100K. Scene context: A view from inside a car driving through a brightly lit tunnel with tiled walls, following a silver SUV and a dark compact car.",
212
+ "sub_caption": "double solid white line: Two continuous white painted lines on the dark asphalt road surface.. Scene role: Separating the traffic lanes on the dark road, leading toward the intersection.",
213
+ "measured_bbox": [
214
+ 0.1922,
215
+ 0.6133,
216
+ 0.4541,
217
+ 1.0
218
+ ],
219
+ "detection_confidence": 0.95,
220
+ "ref_style": "white_bg_encyclopedia_photo",
221
+ "ref_image": "references/ref_double_solid_line.png",
222
+ "raw_ref_image": "references/raw_ref_double_solid_line_attempt_01.png",
223
+ "reference_verify": "references/reference_verify_double_solid_line.json",
224
+ "reference_verify_passed": true,
225
+ "reference_attempts": 1,
226
+ "sam_white_bg": {
227
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_double_solid_line_attempt_01.png",
228
+ "output": "references/ref_double_solid_line.png",
229
+ "mask": "references/sam_mask_double_solid_line.png",
230
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
231
+ "sam_model_type": "vit_b",
232
+ "sam_device": "auto",
233
+ "sam_working_size": [
234
+ 640,
235
+ 640
236
+ ],
237
+ "sam_max_side": 640,
238
+ "sam_downscale": 0.625,
239
+ "prompt_box_xyxy": [
240
+ 22.0,
241
+ 186.0,
242
+ 1001.0,
243
+ 837.0
244
+ ],
245
+ "mask_score": 3.460181,
246
+ "mask_area_ratio": 0.372935,
247
+ "elapsed_seconds": 8.3174
248
+ }
249
+ },
250
+ {
251
+ "name": "dark_building_facade",
252
+ "is_person": false,
253
+ "subject_type": "object",
254
+ "source_set": "obj_set",
255
+ "source_image_id": "BDD100K:c807cb19-7e09cb11:object:8",
256
+ "source_name": "building facade",
257
+ "source_description": "Dark outlines of buildings lining the street on both sides, with some lit windows. Source dataset: BDD100K. Scene context: Nighttime driving view on a multi-lane city street with traffic lights and vehicles ahead.",
258
+ "sub_caption": "building facade: Dark outlines of buildings with scattered, warm-toned lit windows.. Scene role: Forming the urban backdrop along the left side of the street.",
259
+ "measured_bbox": [
260
+ 0.1397,
261
+ 0.0,
262
+ 0.366,
263
+ 0.5427
264
+ ],
265
+ "detection_confidence": 0.8,
266
+ "ref_style": "white_bg_encyclopedia_photo",
267
+ "ref_image": "references/ref_dark_building_facade.png",
268
+ "raw_ref_image": "references/raw_ref_dark_building_facade_attempt_01.png",
269
+ "reference_verify": "references/reference_verify_dark_building_facade.json",
270
+ "reference_verify_passed": true,
271
+ "reference_attempts": 1,
272
+ "sam_white_bg": {
273
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_dark_building_facade_attempt_01.png",
274
+ "output": "references/ref_dark_building_facade.png",
275
+ "mask": "references/sam_mask_dark_building_facade.png",
276
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
277
+ "sam_model_type": "vit_b",
278
+ "sam_device": "auto",
279
+ "sam_working_size": [
280
+ 640,
281
+ 640
282
+ ],
283
+ "sam_max_side": 640,
284
+ "sam_downscale": 0.625,
285
+ "prompt_box_xyxy": [
286
+ 128.0,
287
+ 0.0,
288
+ 887.0,
289
+ 1000.0
290
+ ],
291
+ "mask_score": 2.829968,
292
+ "mask_area_ratio": 0.624767,
293
+ "elapsed_seconds": 7.1675
294
+ }
295
+ },
296
+ {
297
+ "name": "awning_building_corner",
298
+ "is_person": false,
299
+ "subject_type": "object",
300
+ "source_set": "obj_set",
301
+ "source_image_id": "BDD100K:c06d23aa-cb9ae751:object:6",
302
+ "source_name": "building corner",
303
+ "source_description": "The corner of a building on the right side, with an orange or red awning and some lit signs. Source dataset: BDD100K. Scene context: Nighttime driving scene at an intersection with a stop sign and a large black SUV passing on the right.",
304
+ "sub_caption": "building corner: The corner of a building featuring an awning and brightly lit abstract signboards.. Scene role: Anchoring the right side of the intersection with a warm architectural glow.",
305
+ "measured_bbox": [
306
+ 0.6102,
307
+ 0.3347,
308
+ 0.7867,
309
+ 0.5412
310
+ ],
311
+ "detection_confidence": 0.9,
312
+ "ref_style": "white_bg_encyclopedia_photo",
313
+ "ref_image": "references/ref_awning_building_corner.png",
314
+ "raw_ref_image": "references/raw_ref_awning_building_corner_attempt_01.png",
315
+ "reference_verify": "references/reference_verify_awning_building_corner.json",
316
+ "reference_verify_passed": true,
317
+ "reference_attempts": 1,
318
+ "sam_white_bg": {
319
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_awning_building_corner_attempt_01.png",
320
+ "output": "references/ref_awning_building_corner.png",
321
+ "mask": "references/sam_mask_awning_building_corner.png",
322
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
323
+ "sam_model_type": "vit_b",
324
+ "sam_device": "auto",
325
+ "sam_working_size": [
326
+ 640,
327
+ 640
328
+ ],
329
+ "sam_max_side": 640,
330
+ "sam_downscale": 0.625,
331
+ "prompt_box_xyxy": [
332
+ 27.0,
333
+ 27.0,
334
+ 975.0,
335
+ 980.0
336
+ ],
337
+ "mask_score": 3.458235,
338
+ "mask_area_ratio": 0.594922,
339
+ "elapsed_seconds": 7.3072
340
+ }
341
+ },
342
+ {
343
+ "name": "green_street_sign",
344
+ "is_person": false,
345
+ "subject_type": "object",
346
+ "source_set": "obj_set",
347
+ "source_image_id": "CrowdHuman:data/data_50/273278,febe100057ca94db.jpg:object:5",
348
+ "source_name": "street sign",
349
+ "source_description": "A green street sign with white text visible on the left side. Source dataset: CrowdHuman. Scene context: Several pedestrians are crossing a street at a crosswalk on a sunny day.",
350
+ "sub_caption": "street sign: A standard green street sign without any readable text.. Scene role: Hanging from a traffic light pole near the intersection.",
351
+ "measured_bbox": [
352
+ 0.5754,
353
+ 0.1583,
354
+ 0.6522,
355
+ 0.1884
356
+ ],
357
+ "detection_confidence": 100,
358
+ "ref_style": "white_bg_encyclopedia_photo",
359
+ "ref_image": "references/ref_green_street_sign.png",
360
+ "raw_ref_image": "references/raw_ref_green_street_sign_attempt_01.png",
361
+ "reference_verify": "references/reference_verify_green_street_sign.json",
362
+ "reference_verify_passed": true,
363
+ "reference_attempts": 1,
364
+ "sam_white_bg": {
365
+ "input": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/driving/samples/sample_000006/references/raw_ref_green_street_sign_attempt_01.png",
366
+ "output": "references/ref_green_street_sign.png",
367
+ "mask": "references/sam_mask_green_street_sign.png",
368
+ "sam_checkpoint": "/lustre/fs12/portfolios/nvr/projects/nvr_elm_llm/users/hcai/dataset/tmp/lmlu/codex_gen_data/checkpoints/sam/sam_vit_b_01ec64.pth",
369
+ "sam_model_type": "vit_b",
370
+ "sam_device": "auto",
371
+ "sam_working_size": [
372
+ 640,
373
+ 640
374
+ ],
375
+ "sam_max_side": 640,
376
+ "sam_downscale": 0.625,
377
+ "prompt_box_xyxy": [
378
+ 61.0,
379
+ 378.0,
380
+ 962.0,
381
+ 645.0
382
+ ],
383
+ "mask_score": 3.379525,
384
+ "mask_area_ratio": 0.536634,
385
+ "elapsed_seconds": 7.1734
386
+ }
387
+ }
388
+ ],
389
+ "not_emitted": [],
390
+ "model_ids": {
391
+ "chat_model": "gcp/google/gemini-3.1-pro-preview",
392
+ "image_model": "gcp/google/gemini-3-pro-image-preview"
393
+ }
394
+ }