| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """Shared ``cp`` command to copy files between local paths, repositories and buckets. |
| |
| This single command backs three identical CLI entry points: ``hf cp`` (top-level), |
| ``hf repos cp`` and ``hf buckets cp``. It supports any source/destination combination |
| of local file, repo/bucket ``hf://`` URI, and ``-`` (stdin/stdout), with two exceptions: |
| - bucket-to-repo copies are not supported (server limitation), and |
| - local-to-local copies (use a regular ``cp`` for that). |
| """ |
|
|
| import os |
| import sys |
| from dataclasses import replace |
| from typing import Annotated |
|
|
| import typer |
|
|
| from huggingface_hub import HfApi |
| from huggingface_hub.utils import HfUri, SoftTemporaryDirectory, disable_progress_bars, is_hf_uri, parse_hf_uri |
|
|
| from ._cli_utils import TokenOpt, get_hf_api |
| from ._output import out |
|
|
|
|
| CP_EXAMPLES = [ |
| |
| "hf cp hf://username/my-model/config.json", |
| "hf cp hf://username/my-model/config.json ./config.json", |
| "hf cp hf://datasets/username/my-dataset/data.csv ./data/", |
| "hf cp hf://buckets/username/my-bucket/config.json -", |
| |
| "hf cp ./model.safetensors hf://username/my-model/model.safetensors", |
| "hf cp ./config.json hf://buckets/username/my-bucket/logs/", |
| "hf cp - hf://buckets/username/my-bucket/config.json", |
| |
| "hf cp hf://username/source-model/ hf://username/dest-model/", |
| "hf cp hf://datasets/username/my-dataset/processed/ hf://buckets/username/my-bucket/processed/", |
| "hf cp hf://buckets/username/my-bucket/logs/ hf://buckets/username/archive-bucket/ # copies contents only", |
| ] |
|
|
|
|
| def cp( |
| src: Annotated[ |
| str, |
| typer.Argument(help="Source: local file, hf:// URI (repo or bucket), or - for stdin."), |
| ], |
| dst: Annotated[ |
| str | None, |
| typer.Argument(help="Destination: local path, hf:// URI (repo or bucket), or - for stdout."), |
| ] = None, |
| token: TokenOpt = None, |
| ) -> None: |
| """Copy files between local paths, repositories, and buckets. |
| |
| Handles uploads (local/stdin -> repo/bucket), downloads (repo/bucket -> local/stdout) and |
| remote-to-remote copies (repo/bucket -> repo/bucket). Bucket-to-repo and local-to-local |
| copies are not supported. For directories, use `hf upload`/`hf download` (repos) or |
| `hf buckets sync` (buckets). |
| """ |
| api = get_hf_api(token=token) |
|
|
| src_is_stdin = src == "-" |
| dst_is_stdout = dst == "-" |
| src_is_hf = is_hf_uri(src) |
| dst_is_hf = dst is not None and is_hf_uri(dst) |
|
|
| |
| if src_is_hf and dst_is_hf: |
| assert dst is not None |
| api.copy_files(src, dst) |
| out.result("Successfully copied", src=src, dst=dst) |
| return |
|
|
| |
| if not src_is_hf and not dst_is_hf: |
| if dst is None: |
| raise typer.BadParameter("Missing destination. Provide a repo or bucket hf:// URI as DST.") |
| raise typer.BadParameter( |
| "One of SRC or DST must be a repo (hf://username/...) or bucket (hf://buckets/...) URI." |
| ) |
|
|
| |
| if src_is_hf: |
| if dst_is_stdout: |
| _download_file_to_stdout(api, src) |
| return |
| _download_file_to_local(api, src, dst) |
| return |
|
|
| |
| assert dst is not None |
| _upload_file_to_remote(api, src, dst, src_is_stdin=src_is_stdin) |
|
|
|
|
| def _download_file_to_stdout(api: HfApi, src: str) -> None: |
| uri = parse_hf_uri(src) |
| filename = _source_filename(uri, src) |
| |
| with disable_progress_bars(): |
| with SoftTemporaryDirectory() as tmp_dir: |
| tmp_path = os.path.join(tmp_dir, filename) |
| _download_single(api, uri, tmp_path) |
| with open(tmp_path, "rb") as f: |
| while chunk := f.read(32_000_000): |
| sys.stdout.buffer.write(chunk) |
|
|
|
|
| def _download_file_to_local(api: HfApi, src: str, dst: str | None) -> None: |
| uri = parse_hf_uri(src) |
| filename = _source_filename(uri, src) |
|
|
| if dst is None: |
| local_path = filename |
| elif os.path.isdir(dst) or dst.endswith(os.sep) or dst.endswith("/"): |
| local_path = os.path.join(dst, filename) |
| else: |
| local_path = dst |
|
|
| parent_dir = os.path.dirname(local_path) |
| if parent_dir: |
| os.makedirs(parent_dir, exist_ok=True) |
|
|
| _download_single(api, uri, local_path) |
| out.result("Successfully downloaded", src=src, dst=local_path) |
|
|
|
|
| def _download_single(api: HfApi, uri: HfUri, local_path: str) -> None: |
| """Download a single file (repo or bucket) to ``local_path``. |
| |
| Used by `_download_file_to_local` and `_download_file_to_stdout`. |
| """ |
| if uri.is_bucket: |
| api.download_bucket_files(uri.id, [(uri.path_in_repo, local_path)]) |
| else: |
| |
| |
| |
| parent_dir = os.path.dirname(local_path) or "." |
| with SoftTemporaryDirectory(prefix=".tmp", dir=parent_dir) as tmp_dir: |
| downloaded_path = api.hf_hub_download( |
| repo_id=uri.id, |
| repo_type=uri.type, |
| filename=uri.path_in_repo, |
| revision=uri.revision, |
| local_dir=tmp_dir, |
| ) |
| os.replace(downloaded_path, local_path) |
|
|
|
|
| def _source_filename(uri: HfUri, src: str) -> str: |
| if uri.path_in_repo == "" or src.endswith("/"): |
| raise typer.BadParameter( |
| "Source path must include a file name, not just a repo/bucket or directory path." |
| " Use `hf download` or `hf buckets sync` to copy directories." |
| ) |
| return uri.path_in_repo.rsplit("/", 1)[-1] |
|
|
|
|
| def _upload_file_to_remote(api: HfApi, src: str, dst: str, *, src_is_stdin: bool) -> None: |
| uri = parse_hf_uri(dst) |
|
|
| if src_is_stdin: |
| if uri.path_in_repo == "" or dst.endswith("/"): |
| raise typer.BadParameter("Stdin upload requires a full destination path including filename.") |
| data = sys.stdin.buffer.read() |
| _upload_single(api, uri, data, uri.path_in_repo) |
| out.result("Successfully uploaded", src="stdin", dst=uri.to_uri()) |
| return |
|
|
| if os.path.isdir(src): |
| raise typer.BadParameter( |
| "Source must be a file, not a directory. Use `hf upload` or `hf buckets sync` for directories." |
| ) |
| if not os.path.isfile(src): |
| raise typer.BadParameter(f"Source file not found: {src}") |
|
|
| prefix = uri.path_in_repo |
| if prefix == "": |
| remote_path = os.path.basename(src) |
| elif dst.endswith("/"): |
| remote_path = prefix + "/" + os.path.basename(src) |
| else: |
| remote_path = prefix |
|
|
| _upload_single(api, uri, src, remote_path) |
| out.result("Successfully uploaded", src=src, dst=replace(uri, path_in_repo=remote_path).to_uri()) |
|
|
|
|
| def _upload_single(api: HfApi, uri: HfUri, source: str | bytes, remote_path: str) -> None: |
| """Upload a single file or bytes (to a repo or bucket).""" |
| if uri.is_bucket: |
| api.batch_bucket_files(uri.id, add=[(source, remote_path)]) |
| else: |
| api.upload_file( |
| path_or_fileobj=source, |
| path_in_repo=remote_path, |
| repo_id=uri.id, |
| repo_type=uri.type, |
| revision=uri.revision, |
| ) |
|
|