Wauplin's picture
download
raw
8.45 kB
# Copyright 2026-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared ``cp`` command to copy files between local paths, repositories and buckets.
This single command backs three identical CLI entry points: ``hf cp`` (top-level),
``hf repos cp`` and ``hf buckets cp``. It supports any source/destination combination
of local file, repo/bucket ``hf://`` URI, and ``-`` (stdin/stdout), with two exceptions:
- bucket-to-repo copies are not supported (server limitation), and
- local-to-local copies (use a regular ``cp`` for that).
"""
import os
import sys
from dataclasses import replace
from typing import Annotated
import typer
from huggingface_hub import HfApi
from huggingface_hub.utils import HfUri, SoftTemporaryDirectory, disable_progress_bars, is_hf_uri, parse_hf_uri
from ._cli_utils import TokenOpt, get_hf_api
from ._output import out
CP_EXAMPLES = [
# Download (repo or bucket -> local / stdout)
"hf cp hf://username/my-model/config.json",
"hf cp hf://username/my-model/config.json ./config.json",
"hf cp hf://datasets/username/my-dataset/data.csv ./data/",
"hf cp hf://buckets/username/my-bucket/config.json -",
# Upload (local / stdin -> repo or bucket)
"hf cp ./model.safetensors hf://username/my-model/model.safetensors",
"hf cp ./config.json hf://buckets/username/my-bucket/logs/",
"hf cp - hf://buckets/username/my-bucket/config.json",
# Remote to remote (repo/bucket -> repo/bucket, server-side when possible)
"hf cp hf://username/source-model/ hf://username/dest-model/",
"hf cp hf://datasets/username/my-dataset/processed/ hf://buckets/username/my-bucket/processed/",
"hf cp hf://buckets/username/my-bucket/logs/ hf://buckets/username/archive-bucket/ # copies contents only",
]
def cp(
src: Annotated[
str,
typer.Argument(help="Source: local file, hf:// URI (repo or bucket), or - for stdin."),
],
dst: Annotated[
str | None,
typer.Argument(help="Destination: local path, hf:// URI (repo or bucket), or - for stdout."),
] = None,
token: TokenOpt = None,
) -> None:
"""Copy files between local paths, repositories, and buckets.
Handles uploads (local/stdin -> repo/bucket), downloads (repo/bucket -> local/stdout) and
remote-to-remote copies (repo/bucket -> repo/bucket). Bucket-to-repo and local-to-local
copies are not supported. For directories, use `hf upload`/`hf download` (repos) or
`hf buckets sync` (buckets).
"""
api = get_hf_api(token=token)
src_is_stdin = src == "-"
dst_is_stdout = dst == "-"
src_is_hf = is_hf_uri(src)
dst_is_hf = dst is not None and is_hf_uri(dst)
# --- Remote to remote: delegate to copy_files (repo/bucket -> repo/bucket) ---
if src_is_hf and dst_is_hf:
assert dst is not None # guaranteed by dst_is_hf
api.copy_files(src, dst)
out.result("Successfully copied", src=src, dst=dst)
return
# --- At least one side must be a remote hf:// URI (rules out local->local, stdin->local, etc.) ---
if not src_is_hf and not dst_is_hf:
if dst is None:
raise typer.BadParameter("Missing destination. Provide a repo or bucket hf:// URI as DST.")
raise typer.BadParameter(
"One of SRC or DST must be a repo (hf://username/...) or bucket (hf://buckets/...) URI."
)
# --- Download: repo/bucket -> local file or stdout ---
if src_is_hf:
if dst_is_stdout:
_download_file_to_stdout(api, src)
return
_download_file_to_local(api, src, dst)
return
# --- Upload: local file or stdin -> repo/bucket ---
assert dst is not None # guaranteed: reaching here means dst_is_hf is True
_upload_file_to_remote(api, src, dst, src_is_stdin=src_is_stdin)
def _download_file_to_stdout(api: HfApi, src: str) -> None:
uri = parse_hf_uri(src)
filename = _source_filename(uri, src)
# Suppress progress bars to avoid polluting the piped output.
with disable_progress_bars():
with SoftTemporaryDirectory() as tmp_dir:
tmp_path = os.path.join(tmp_dir, filename)
_download_single(api, uri, tmp_path)
with open(tmp_path, "rb") as f:
while chunk := f.read(32_000_000): # 32MB chunks
sys.stdout.buffer.write(chunk)
def _download_file_to_local(api: HfApi, src: str, dst: str | None) -> None:
uri = parse_hf_uri(src)
filename = _source_filename(uri, src)
if dst is None:
local_path = filename
elif os.path.isdir(dst) or dst.endswith(os.sep) or dst.endswith("/"):
local_path = os.path.join(dst, filename)
else:
local_path = dst
parent_dir = os.path.dirname(local_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
_download_single(api, uri, local_path)
out.result("Successfully downloaded", src=src, dst=local_path)
def _download_single(api: HfApi, uri: HfUri, local_path: str) -> None:
"""Download a single file (repo or bucket) to ``local_path``.
Used by `_download_file_to_local` and `_download_file_to_stdout`.
"""
if uri.is_bucket:
api.download_bucket_files(uri.id, [(uri.path_in_repo, local_path)])
else:
# Download into a temporary folder next to the destination (rather than the shared cache)
# so the final move stays on the same filesystem and is instant. The temp folder is
# cleaned up automatically once the move is complete.
parent_dir = os.path.dirname(local_path) or "."
with SoftTemporaryDirectory(prefix=".tmp", dir=parent_dir) as tmp_dir:
downloaded_path = api.hf_hub_download(
repo_id=uri.id,
repo_type=uri.type,
filename=uri.path_in_repo,
revision=uri.revision,
local_dir=tmp_dir,
)
os.replace(downloaded_path, local_path)
def _source_filename(uri: HfUri, src: str) -> str:
if uri.path_in_repo == "" or src.endswith("/"):
raise typer.BadParameter(
"Source path must include a file name, not just a repo/bucket or directory path."
" Use `hf download` or `hf buckets sync` to copy directories."
)
return uri.path_in_repo.rsplit("/", 1)[-1]
def _upload_file_to_remote(api: HfApi, src: str, dst: str, *, src_is_stdin: bool) -> None:
uri = parse_hf_uri(dst)
if src_is_stdin:
if uri.path_in_repo == "" or dst.endswith("/"):
raise typer.BadParameter("Stdin upload requires a full destination path including filename.")
data = sys.stdin.buffer.read()
_upload_single(api, uri, data, uri.path_in_repo)
out.result("Successfully uploaded", src="stdin", dst=uri.to_uri())
return
if os.path.isdir(src):
raise typer.BadParameter(
"Source must be a file, not a directory. Use `hf upload` or `hf buckets sync` for directories."
)
if not os.path.isfile(src):
raise typer.BadParameter(f"Source file not found: {src}")
prefix = uri.path_in_repo
if prefix == "":
remote_path = os.path.basename(src)
elif dst.endswith("/"):
remote_path = prefix + "/" + os.path.basename(src)
else:
remote_path = prefix
_upload_single(api, uri, src, remote_path)
out.result("Successfully uploaded", src=src, dst=replace(uri, path_in_repo=remote_path).to_uri())
def _upload_single(api: HfApi, uri: HfUri, source: str | bytes, remote_path: str) -> None:
"""Upload a single file or bytes (to a repo or bucket)."""
if uri.is_bucket:
api.batch_bucket_files(uri.id, add=[(source, remote_path)])
else:
api.upload_file(
path_or_fileobj=source,
path_in_repo=remote_path,
repo_id=uri.id,
repo_type=uri.type,
revision=uri.revision,
)

Xet Storage Details

Size:
8.45 kB
·
Xet hash:
1cd402d03c80d6b356f7a7dd6e8cbed8eed88b2d7263bd1a7fccaa129f944c5a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.