File size: 6,187 Bytes
24f3fb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
"""
Simple Qdrant Collection Backup Script
Backs up collection metadata and all points to JSON files
"""

from qdrant_client import QdrantClient
from core.clients import get_qdrant
from core.config import QDRANT_COLLECTION
from qdrant_client.http.models import PointStruct

import json
import os
from datetime import datetime
from qdrant_client import QdrantClient
from qdrant_client.models import Filter



def backup_collection(client, collection_name, backup_dir="./backups"):
    """
    Backup a Qdrant collection to JSON files
    
    Args:
        client: QdrantClient instance
        collection_name: Name of collection to backup
        backup_dir: Directory to save backup files
    """
    
    # Create backup directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    collection_backup_dir = os.path.join(backup_dir, f"{collection_name}_{timestamp}")
    os.makedirs(collection_backup_dir, exist_ok=True)
    
    print(f"Starting backup of collection '{collection_name}'...")
    
    try:
        # 1. Backup collection info
        collection_info = client.get_collection(collection_name)
        with open(os.path.join(collection_backup_dir, "collection_info.json"), "w") as f:
            # Convert to dict for JSON serialization
            info_dict = {
                "collection_name": collection_name,
                "vectors_config": {}
            }
            
            # Handle vectors config (named vectors)
            vectors_config = collection_info.config.params.vectors
            if isinstance(vectors_config, dict):
                # Named vectors (your case with clip_text_embedding, clip_image_embedding)
                for name, vector_params in vectors_config.items():
                    info_dict["vectors_config"][name] = {
                        "size": vector_params.size,
                        "distance": vector_params.distance.value if hasattr(vector_params.distance, 'value') else str(vector_params.distance)
                    }
            else:
                # Single vector config
                info_dict["vectors_config"]["default"] = {
                    "size": vectors_config.size,
                    "distance": vectors_config.distance.value if hasattr(vectors_config.distance, 'value') else str(vectors_config.distance)
                }
            
            # Add other basic config info
            info_dict["config"] = {
                "replication_factor": collection_info.config.params.replication_factor,
                "write_consistency_factor": collection_info.config.params.write_consistency_factor,
                "shard_number": collection_info.config.params.shard_number,
            }
            
            json.dump(info_dict, f, indent=2)
        
        print("✓ Collection metadata backed up")
        
        # 2. Backup all points
        batch_size = 100
        offset = None
        all_points = []
        
        while True:
            # Scroll through all points
            points, next_offset = client.scroll(
                collection_name=collection_name,
                limit=batch_size,
                offset=offset,
                with_payload=True,
                with_vectors=True
            )
            
            if not points:
                break
                
            # Convert points to serializable format
            for point in points:
                point_dict = {
                    "id": point.id,
                    "payload": point.payload,
                    "vector": {}
                }
                
                # Handle both named and unnamed vectors
                if isinstance(point.vector, dict):
                    # Named vectors (clip_text_embedding, clip_image_embedding, etc.)
                    for vector_name, vector_data in point.vector.items():
                        if vector_data is not None:
                            point_dict["vector"][vector_name] = list(vector_data) if hasattr(vector_data, '__iter__') else vector_data
                elif point.vector is not None:
                    # Single vector
                    point_dict["vector"] = list(point.vector) if hasattr(point.vector, '__iter__') else point.vector
                
                all_points.append(point_dict)
            
            print(f"Backed up {len(all_points)} points...")
            
            if next_offset is None:
                break
            offset = next_offset
        
        # Save all points to JSON
        with open(os.path.join(collection_backup_dir, "points.json"), "w") as f:
            json.dump(all_points, f, indent=2)
        
        print(f"✓ {len(all_points)} points backed up")
        
        # 3. Create backup summary
        summary = {
            "collection_name": collection_name,
            "backup_timestamp": timestamp,
            "total_points": len(all_points),
            "backup_files": ["collection_info.json", "points.json"]
        }
        
        with open(os.path.join(collection_backup_dir, "backup_summary.json"), "w") as f:
            json.dump(summary, f, indent=2)
        
        print(f"✓ Backup completed successfully!")
        print(f"Backup location: {collection_backup_dir}")
        print(f"Files created:")
        print(f"  - collection_info.json (metadata)")
        print(f"  - points.json ({len(all_points)} points)")
        print(f"  - backup_summary.json (summary)")
        
        return collection_backup_dir
        
    except Exception as e:
        print(f"❌ Backup failed: {str(e)}")
        return None


def main():
    # Initialize client
    client = get_qdrant()

    
    # List available collections
    collections = client.get_collections()
    print("Available collections:")
    for collection in collections.collections:
        print(f"  - {collection.name}")
    
    # Backup the specified collection
    backup_dir = backup_collection(client, QDRANT_COLLECTION)
    
    if backup_dir:
        print(f"\n🎉 Backup successful! Files saved to: {backup_dir}")
    else:
        print("\n❌ Backup failed!")

if __name__ == "__main__":
    main()