| """
|
| Vector Memory Module
|
| Mem贸ria de longo prazo usando ChromaDB + Sentence Transformers
|
| """
|
|
|
| import chromadb
|
| from sentence_transformers import SentenceTransformer
|
| from datetime import datetime
|
| import hashlib
|
|
|
|
|
| EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
|
|
|
|
| _memory_instance = None
|
|
|
|
|
| def get_memory():
|
| """Get or create memory instance."""
|
| global _memory_instance
|
| if _memory_instance is None:
|
| _memory_instance = VectorMemory()
|
| return _memory_instance
|
|
|
|
|
| class VectorMemory:
|
| def __init__(self):
|
| print("Inicializando mem贸ria vetorial...")
|
|
|
|
|
| self.model = SentenceTransformer(EMBEDDING_MODEL)
|
| print(f"Modelo carregado: {EMBEDDING_MODEL}")
|
|
|
|
|
| self.client = chromadb.Client()
|
| self.collection = self.client.get_or_create_collection(
|
| name="chat_memory",
|
| metadata={"hnsw:space": "cosine"}
|
| )
|
|
|
| print(f"Mem贸ria pronta. {self.collection.count()} mem贸rias existentes.")
|
|
|
| def _generate_id(self, text: str) -> str:
|
| """Generate unique ID for a memory."""
|
| timestamp = datetime.now().isoformat()
|
| content = f"{timestamp}:{text}"
|
| return hashlib.md5(content.encode()).hexdigest()
|
|
|
| def add_memory(self, user_message: str, bot_response: str):
|
| """
|
| Add a conversation exchange to memory.
|
| Stores the combined context for better retrieval.
|
| """
|
|
|
| combined = f"Usu谩rio: {user_message}\nAssistente: {bot_response}"
|
|
|
|
|
| embedding = self.model.encode(combined).tolist()
|
|
|
|
|
| doc_id = self._generate_id(combined)
|
|
|
|
|
| metadata = {
|
| "user_message": user_message[:500],
|
| "bot_response": bot_response[:500],
|
| "timestamp": datetime.now().isoformat()
|
| }
|
|
|
|
|
| self.collection.add(
|
| ids=[doc_id],
|
| embeddings=[embedding],
|
| documents=[combined],
|
| metadatas=[metadata]
|
| )
|
|
|
| print(f"Mem贸ria adicionada. Total: {self.collection.count()}")
|
|
|
| def search_memories(self, query: str, k: int = 3) -> list[dict]:
|
| """
|
| Search for relevant memories based on the query.
|
| Returns list of {text, user_message, bot_response, score}
|
| """
|
| if self.collection.count() == 0:
|
| return []
|
|
|
|
|
| query_embedding = self.model.encode(query).tolist()
|
|
|
|
|
| results = self.collection.query(
|
| query_embeddings=[query_embedding],
|
| n_results=min(k, self.collection.count())
|
| )
|
|
|
| memories = []
|
| if results and results['documents'] and results['documents'][0]:
|
| for i, doc in enumerate(results['documents'][0]):
|
| metadata = results['metadatas'][0][i] if results['metadatas'] else {}
|
| distance = results['distances'][0][i] if results['distances'] else 0
|
|
|
| memories.append({
|
| "text": doc,
|
| "user_message": metadata.get("user_message", ""),
|
| "bot_response": metadata.get("bot_response", ""),
|
| "score": 1 - distance,
|
| "timestamp": metadata.get("timestamp", "")
|
| })
|
|
|
| return memories
|
|
|
| def clear_memories(self):
|
| """Clear all memories."""
|
|
|
| self.client.delete_collection("chat_memory")
|
| self.collection = self.client.get_or_create_collection(
|
| name="chat_memory",
|
| metadata={"hnsw:space": "cosine"}
|
| )
|
| print("Mem贸rias limpas.")
|
|
|
| def get_stats(self) -> dict:
|
| """Get memory statistics."""
|
| return {
|
| "total_memories": self.collection.count(),
|
| "model": EMBEDDING_MODEL
|
| }
|
|
|