Image-to-Video
Diffusers
Safetensors
English
video-generation
diffusion
4d-control
camera-control
object-motion
world-model
Instructions to use TencentARC/VerseCrafter with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use TencentARC/VerseCrafter with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("TencentARC/VerseCrafter", dtype=torch.bfloat16, device_map="cuda") pipe.to("cuda") prompt = "A man with short gray hair plays a red electric guitar." image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4") - Notebooks
- Google Colab
- Kaggle
| { | |
| "_class_name": "VerseCrafterWanTransformer3DModel", | |
| "_diffusers_version": "0.30.1", | |
| "add_control_adapter": false, | |
| "add_ref_conv": false, | |
| "cross_attn_norm": true, | |
| "cross_attn_type": null, | |
| "dim": 5120, | |
| "downscale_factor_control_adapter": 8, | |
| "eps": 1e-06, | |
| "ffn_dim": 13824, | |
| "freq_dim": 256, | |
| "hidden_size": 2048, | |
| "in_channels": 16, | |
| "in_dim": 16, | |
| "in_dim_control_adapter": 24, | |
| "in_dim_ref_conv": 16, | |
| "model_type": "t2v", | |
| "num_heads": 40, | |
| "num_layers": 40, | |
| "out_dim": 16, | |
| "patch_size": [ | |
| 1, | |
| 2, | |
| 2 | |
| ], | |
| "qk_norm": true, | |
| "text_dim": 4096, | |
| "text_len": 512, | |
| "geoada_in_dim": 128, | |
| "geoada_layers": [ | |
| 0, | |
| 5, | |
| 10, | |
| 15, | |
| 20, | |
| 25, | |
| 30, | |
| 35 | |
| ], | |
| "window_size": [ | |
| -1, | |
| -1 | |
| ] | |
| } | |