File size: 8,445 Bytes
c10fb73 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | # Copyright 2026-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared ``cp`` command to copy files between local paths, repositories and buckets.
This single command backs three identical CLI entry points: ``hf cp`` (top-level),
``hf repos cp`` and ``hf buckets cp``. It supports any source/destination combination
of local file, repo/bucket ``hf://`` URI, and ``-`` (stdin/stdout), with two exceptions:
- bucket-to-repo copies are not supported (server limitation), and
- local-to-local copies (use a regular ``cp`` for that).
"""
import os
import sys
from dataclasses import replace
from typing import Annotated
import typer
from huggingface_hub import HfApi
from huggingface_hub.utils import HfUri, SoftTemporaryDirectory, disable_progress_bars, is_hf_uri, parse_hf_uri
from ._cli_utils import TokenOpt, get_hf_api
from ._output import out
CP_EXAMPLES = [
# Download (repo or bucket -> local / stdout)
"hf cp hf://username/my-model/config.json",
"hf cp hf://username/my-model/config.json ./config.json",
"hf cp hf://datasets/username/my-dataset/data.csv ./data/",
"hf cp hf://buckets/username/my-bucket/config.json -",
# Upload (local / stdin -> repo or bucket)
"hf cp ./model.safetensors hf://username/my-model/model.safetensors",
"hf cp ./config.json hf://buckets/username/my-bucket/logs/",
"hf cp - hf://buckets/username/my-bucket/config.json",
# Remote to remote (repo/bucket -> repo/bucket, server-side when possible)
"hf cp hf://username/source-model/ hf://username/dest-model/",
"hf cp hf://datasets/username/my-dataset/processed/ hf://buckets/username/my-bucket/processed/",
"hf cp hf://buckets/username/my-bucket/logs/ hf://buckets/username/archive-bucket/ # copies contents only",
]
def cp(
src: Annotated[
str,
typer.Argument(help="Source: local file, hf:// URI (repo or bucket), or - for stdin."),
],
dst: Annotated[
str | None,
typer.Argument(help="Destination: local path, hf:// URI (repo or bucket), or - for stdout."),
] = None,
token: TokenOpt = None,
) -> None:
"""Copy files between local paths, repositories, and buckets.
Handles uploads (local/stdin -> repo/bucket), downloads (repo/bucket -> local/stdout) and
remote-to-remote copies (repo/bucket -> repo/bucket). Bucket-to-repo and local-to-local
copies are not supported. For directories, use `hf upload`/`hf download` (repos) or
`hf buckets sync` (buckets).
"""
api = get_hf_api(token=token)
src_is_stdin = src == "-"
dst_is_stdout = dst == "-"
src_is_hf = is_hf_uri(src)
dst_is_hf = dst is not None and is_hf_uri(dst)
# --- Remote to remote: delegate to copy_files (repo/bucket -> repo/bucket) ---
if src_is_hf and dst_is_hf:
assert dst is not None # guaranteed by dst_is_hf
api.copy_files(src, dst)
out.result("Successfully copied", src=src, dst=dst)
return
# --- At least one side must be a remote hf:// URI (rules out local->local, stdin->local, etc.) ---
if not src_is_hf and not dst_is_hf:
if dst is None:
raise typer.BadParameter("Missing destination. Provide a repo or bucket hf:// URI as DST.")
raise typer.BadParameter(
"One of SRC or DST must be a repo (hf://username/...) or bucket (hf://buckets/...) URI."
)
# --- Download: repo/bucket -> local file or stdout ---
if src_is_hf:
if dst_is_stdout:
_download_file_to_stdout(api, src)
return
_download_file_to_local(api, src, dst)
return
# --- Upload: local file or stdin -> repo/bucket ---
assert dst is not None # guaranteed: reaching here means dst_is_hf is True
_upload_file_to_remote(api, src, dst, src_is_stdin=src_is_stdin)
def _download_file_to_stdout(api: HfApi, src: str) -> None:
uri = parse_hf_uri(src)
filename = _source_filename(uri, src)
# Suppress progress bars to avoid polluting the piped output.
with disable_progress_bars():
with SoftTemporaryDirectory() as tmp_dir:
tmp_path = os.path.join(tmp_dir, filename)
_download_single(api, uri, tmp_path)
with open(tmp_path, "rb") as f:
while chunk := f.read(32_000_000): # 32MB chunks
sys.stdout.buffer.write(chunk)
def _download_file_to_local(api: HfApi, src: str, dst: str | None) -> None:
uri = parse_hf_uri(src)
filename = _source_filename(uri, src)
if dst is None:
local_path = filename
elif os.path.isdir(dst) or dst.endswith(os.sep) or dst.endswith("/"):
local_path = os.path.join(dst, filename)
else:
local_path = dst
parent_dir = os.path.dirname(local_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
_download_single(api, uri, local_path)
out.result("Successfully downloaded", src=src, dst=local_path)
def _download_single(api: HfApi, uri: HfUri, local_path: str) -> None:
"""Download a single file (repo or bucket) to ``local_path``.
Used by `_download_file_to_local` and `_download_file_to_stdout`.
"""
if uri.is_bucket:
api.download_bucket_files(uri.id, [(uri.path_in_repo, local_path)])
else:
# Download into a temporary folder next to the destination (rather than the shared cache)
# so the final move stays on the same filesystem and is instant. The temp folder is
# cleaned up automatically once the move is complete.
parent_dir = os.path.dirname(local_path) or "."
with SoftTemporaryDirectory(prefix=".tmp", dir=parent_dir) as tmp_dir:
downloaded_path = api.hf_hub_download(
repo_id=uri.id,
repo_type=uri.type,
filename=uri.path_in_repo,
revision=uri.revision,
local_dir=tmp_dir,
)
os.replace(downloaded_path, local_path)
def _source_filename(uri: HfUri, src: str) -> str:
if uri.path_in_repo == "" or src.endswith("/"):
raise typer.BadParameter(
"Source path must include a file name, not just a repo/bucket or directory path."
" Use `hf download` or `hf buckets sync` to copy directories."
)
return uri.path_in_repo.rsplit("/", 1)[-1]
def _upload_file_to_remote(api: HfApi, src: str, dst: str, *, src_is_stdin: bool) -> None:
uri = parse_hf_uri(dst)
if src_is_stdin:
if uri.path_in_repo == "" or dst.endswith("/"):
raise typer.BadParameter("Stdin upload requires a full destination path including filename.")
data = sys.stdin.buffer.read()
_upload_single(api, uri, data, uri.path_in_repo)
out.result("Successfully uploaded", src="stdin", dst=uri.to_uri())
return
if os.path.isdir(src):
raise typer.BadParameter(
"Source must be a file, not a directory. Use `hf upload` or `hf buckets sync` for directories."
)
if not os.path.isfile(src):
raise typer.BadParameter(f"Source file not found: {src}")
prefix = uri.path_in_repo
if prefix == "":
remote_path = os.path.basename(src)
elif dst.endswith("/"):
remote_path = prefix + "/" + os.path.basename(src)
else:
remote_path = prefix
_upload_single(api, uri, src, remote_path)
out.result("Successfully uploaded", src=src, dst=replace(uri, path_in_repo=remote_path).to_uri())
def _upload_single(api: HfApi, uri: HfUri, source: str | bytes, remote_path: str) -> None:
"""Upload a single file or bytes (to a repo or bucket)."""
if uri.is_bucket:
api.batch_bucket_files(uri.id, add=[(source, remote_path)])
else:
api.upload_file(
path_or_fileobj=source,
path_in_repo=remote_path,
repo_id=uri.id,
repo_type=uri.type,
revision=uri.revision,
)
|