cache
¶
Full name: tenets.core.nlp.cache
cache¶
Embedding cache management.
This module provides caching for embeddings to avoid recomputation of expensive embedding operations.
Classes¶
EmbeddingCache¶
Cache for embedding vectors.
Uses a two-level cache: 1. Memory cache for hot embeddings 2. Disk cache for persistence
Initialize embedding cache.
PARAMETER | DESCRIPTION |
---|---|
cache_dir | Directory for disk cache TYPE: |
max_memory_items | Maximum items in memory cache TYPE: |
ttl_days | Time to live for cached embeddings TYPE: |
Source code in tenets/core/nlp/cache.py
def __init__(self, cache_dir: Path, max_memory_items: int = 1000, ttl_days: int = 30):
"""Initialize embedding cache.
Args:
cache_dir: Directory for disk cache
max_memory_items: Maximum items in memory cache
ttl_days: Time to live for cached embeddings
"""
self.logger = get_logger(__name__)
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
# Memory cache
self._memory_cache: Dict[str, np.ndarray] = {}
self._access_order: list[str] = []
self.max_memory_items = max_memory_items
# Disk cache
self.disk_cache = DiskCache(self.cache_dir, name="embeddings")
self.ttl_seconds = ttl_days * 24 * 3600
Functions¶
get¶
Get cached embedding.
PARAMETER | DESCRIPTION |
---|---|
text | Text that was embedded TYPE: |
model_name | Model used for embedding TYPE: |
RETURNS | DESCRIPTION |
---|---|
Optional[ndarray] | Cached embedding or None |
Source code in tenets/core/nlp/cache.py
def get(self, text: str, model_name: str = "default") -> Optional[np.ndarray]:
"""Get cached embedding.
Args:
text: Text that was embedded
model_name: Model used for embedding
Returns:
Cached embedding or None
"""
key = self._make_key(text, model_name)
# Check memory cache
if key in self._memory_cache:
# Move to end (LRU)
if key in self._access_order:
self._access_order.remove(key)
self._access_order.append(key)
return self._memory_cache[key]
# Check disk cache
cached = self.disk_cache.get(key)
if cached is not None:
# Validate it's an embedding
if isinstance(cached, np.ndarray):
# Promote to memory cache
self._add_to_memory(key, cached)
return cached
else:
self.logger.warning(f"Invalid cached embedding for {key}")
self.disk_cache.delete(key)
return None
put¶
Cache an embedding.
PARAMETER | DESCRIPTION |
---|---|
text | Text that was embedded TYPE: |
embedding | Embedding vector TYPE: |
model_name | Model used TYPE: |
Source code in tenets/core/nlp/cache.py
def put(self, text: str, embedding: np.ndarray, model_name: str = "default"):
"""Cache an embedding.
Args:
text: Text that was embedded
embedding: Embedding vector
model_name: Model used
"""
key = self._make_key(text, model_name)
# Add to memory cache
self._add_to_memory(key, embedding)
# Add to disk cache
self.disk_cache.put(
key,
embedding,
ttl=self.ttl_seconds,
metadata={"model": model_name, "dim": embedding.shape[0], "text_preview": text[:100]},
)
get_batch¶
Get multiple cached embeddings.
PARAMETER | DESCRIPTION |
---|---|
texts | List of texts |
model_name | Model used TYPE: |
RETURNS | DESCRIPTION |
---|---|
Dict[str, Optional[ndarray]] | Dict mapping text to embedding (or None if not cached) |
Source code in tenets/core/nlp/cache.py
def get_batch(
self, texts: list[str], model_name: str = "default"
) -> Dict[str, Optional[np.ndarray]]:
"""Get multiple cached embeddings.
Args:
texts: List of texts
model_name: Model used
Returns:
Dict mapping text to embedding (or None if not cached)
"""
results = {}
for text in texts:
results[text] = self.get(text, model_name)
return results
put_batch¶
Cache multiple embeddings.
PARAMETER | DESCRIPTION |
---|---|
embeddings | Dict mapping text to embedding |
model_name | Model used TYPE: |