cache¶
Full name: tenets.utils.cache
cache¶
Caching utilities for tenets.
Provides LRU caching with TTL support, file-based caching for expensive computations like embeddings and ranking scores.
Classes¶
CacheEntrydataclass¶
CacheEntry(value: T, created_at: float = time.time(), last_accessed: float = time.time(), access_count: int = 0, size_bytes: int = 0)
LRUCache¶
Bases: Generic[T]
Thread-safe LRU cache with optional TTL.
Provides efficient caching with automatic eviction of least recently used entries when capacity is reached.
| ATTRIBUTE | DESCRIPTION |
|---|---|
max_size | Maximum number of entries |
ttl_seconds | Time-to-live in seconds (0 = no expiration) |
hits | Number of cache hits |
misses | Number of cache misses |
Initialize the cache.
| PARAMETER | DESCRIPTION |
|---|---|
max_size | Maximum number of entries to store TYPE: |
ttl_seconds | Time-to-live for entries (0 = no expiration) TYPE: |
name | Name for logging/debugging TYPE: |
Source code in tenets/utils/cache.py
def __init__(
self,
max_size: int = 1000,
ttl_seconds: float = 0,
name: str = "cache",
):
"""Initialize the cache.
Args:
max_size: Maximum number of entries to store
ttl_seconds: Time-to-live for entries (0 = no expiration)
name: Name for logging/debugging
"""
self.max_size = max_size
self.ttl_seconds = ttl_seconds
self.name = name
self._cache: Dict[str, CacheEntry[T]] = {}
self._lock = threading.RLock()
# Statistics
self.hits = 0
self.misses = 0
Attributes¶
Functions¶
get¶
Get value from cache.
| PARAMETER | DESCRIPTION |
|---|---|
key | Cache key TYPE: |
| RETURNS | DESCRIPTION |
|---|---|
Optional[T] | Cached value or None if not found/expired |
Source code in tenets/utils/cache.py
def get(self, key: str) -> Optional[T]:
"""Get value from cache.
Args:
key: Cache key
Returns:
Cached value or None if not found/expired
"""
with self._lock:
entry = self._cache.get(key)
if entry is None:
self.misses += 1
return None
if entry.is_expired(self.ttl_seconds):
del self._cache[key]
self.misses += 1
return None
entry.touch()
self.hits += 1
return entry.value
set¶
Set value in cache.
| PARAMETER | DESCRIPTION |
|---|---|
key | Cache key TYPE: |
value | Value to cache TYPE: |
size_bytes | Optional size estimate for memory tracking TYPE: |
Source code in tenets/utils/cache.py
def set(self, key: str, value: T, size_bytes: int = 0) -> None:
"""Set value in cache.
Args:
key: Cache key
value: Value to cache
size_bytes: Optional size estimate for memory tracking
"""
with self._lock:
# Evict if at capacity
if len(self._cache) >= self.max_size and key not in self._cache:
self._evict_lru()
self._cache[key] = CacheEntry(
value=value,
size_bytes=size_bytes,
)
delete¶
stats¶
Get cache statistics.
| RETURNS | DESCRIPTION |
|---|---|
Dict[str, Any] | Dictionary with cache statistics |
Source code in tenets/utils/cache.py
def stats(self) -> Dict[str, Any]:
"""Get cache statistics.
Returns:
Dictionary with cache statistics
"""
with self._lock:
total_size = sum(e.size_bytes for e in self._cache.values())
return {
"name": self.name,
"size": self.size,
"max_size": self.max_size,
"hits": self.hits,
"misses": self.misses,
"hit_rate": self.hit_rate,
"total_bytes": total_size,
"ttl_seconds": self.ttl_seconds,
}
FileContentCache¶
Cache for file contents with modification tracking.
Automatically invalidates cache entries when files are modified.
| ATTRIBUTE | DESCRIPTION |
|---|---|
max_size | Maximum number of files to cache |
max_file_size | Maximum file size to cache (bytes) |
Initialize file content cache.
| PARAMETER | DESCRIPTION |
|---|---|
max_size | Maximum number of files to cache TYPE: |
max_file_size | Maximum file size to cache in bytes TYPE: |
Source code in tenets/utils/cache.py
def __init__(
self,
max_size: int = 500,
max_file_size: int = 1024 * 1024, # 1MB default
):
"""Initialize file content cache.
Args:
max_size: Maximum number of files to cache
max_file_size: Maximum file size to cache in bytes
"""
self._cache: LRUCache[tuple[float, str]] = LRUCache(
max_size=max_size,
name="file_content",
)
self.max_file_size = max_file_size
Functions¶
get¶
Get cached file content if still valid.
| PARAMETER | DESCRIPTION |
|---|---|
path | Path to file TYPE: |
| RETURNS | DESCRIPTION |
|---|---|
Optional[str] | File content or None if not cached/stale |
Source code in tenets/utils/cache.py
def get(self, path: Path) -> Optional[str]:
"""Get cached file content if still valid.
Args:
path: Path to file
Returns:
File content or None if not cached/stale
"""
key = str(path.resolve())
entry = self._cache.get(key)
if entry is None:
return None
mtime, content = entry
# Check if file was modified
try:
current_mtime = path.stat().st_mtime
if current_mtime > mtime:
self._cache.delete(key)
return None
except OSError:
self._cache.delete(key)
return None
return content
set¶
Cache file content.
| PARAMETER | DESCRIPTION |
|---|---|
path | Path to file TYPE: |
content | File content TYPE: |
| RETURNS | DESCRIPTION |
|---|---|
bool | True if cached, False if file too large |
Source code in tenets/utils/cache.py
def set(self, path: Path, content: str) -> bool:
"""Cache file content.
Args:
path: Path to file
content: File content
Returns:
True if cached, False if file too large
"""
if len(content) > self.max_file_size:
return False
key = str(path.resolve())
try:
mtime = path.stat().st_mtime
except OSError:
mtime = time.time()
self._cache.set(key, (mtime, content), size_bytes=len(content))
return True
invalidate¶
stats¶
EmbeddingCache¶
Cache for text embeddings.
Stores computed embeddings to avoid re-computation for repeated queries or unchanged files.
| ATTRIBUTE | DESCRIPTION |
|---|---|
max_size | Maximum number of embeddings to cache |
ttl_seconds | Time-to-live for cached embeddings |
Initialize embedding cache.
| PARAMETER | DESCRIPTION |
|---|---|
max_size | Maximum number of embeddings to cache TYPE: |
ttl_seconds | Time-to-live for cached embeddings TYPE: |
Source code in tenets/utils/cache.py
def __init__(
self,
max_size: int = 2000,
ttl_seconds: float = 3600, # 1 hour default
):
"""Initialize embedding cache.
Args:
max_size: Maximum number of embeddings to cache
ttl_seconds: Time-to-live for cached embeddings
"""
self._cache: LRUCache[list[float]] = LRUCache(
max_size=max_size,
ttl_seconds=ttl_seconds,
name="embeddings",
)
Functions¶
get¶
set¶
Cache an embedding.
| PARAMETER | DESCRIPTION |
|---|---|
text | Text that was embedded TYPE: |
embedding | Embedding vector |
model | Model used for embedding TYPE: |
Source code in tenets/utils/cache.py
def set(
self,
text: str,
embedding: list[float],
model: str = "default",
) -> None:
"""Cache an embedding.
Args:
text: Text that was embedded
embedding: Embedding vector
model: Model used for embedding
"""
key = cache_key(text[:500], model)
self._cache.set(key, embedding, size_bytes=len(embedding) * 8)
stats¶
RankingScoreCache¶
Cache for file ranking scores.
Caches ranking scores for file-prompt pairs to speed up repeated queries on unchanged files.
| ATTRIBUTE | DESCRIPTION |
|---|---|
max_size | Maximum number of scores to cache |
ttl_seconds | Time-to-live for cached scores |
Initialize ranking score cache.
| PARAMETER | DESCRIPTION |
|---|---|
max_size | Maximum number of scores to cache TYPE: |
ttl_seconds | Time-to-live for cached scores TYPE: |
Source code in tenets/utils/cache.py
def __init__(
self,
max_size: int = 5000,
ttl_seconds: float = 300, # 5 minutes default
):
"""Initialize ranking score cache.
Args:
max_size: Maximum number of scores to cache
ttl_seconds: Time-to-live for cached scores
"""
self._cache: LRUCache[Dict[str, Any]] = LRUCache(
max_size=max_size,
ttl_seconds=ttl_seconds,
name="ranking_scores",
)
Functions¶
get¶
get(file_path: Path, prompt_hash: str, file_mtime: float, algorithm: str = 'balanced') -> Optional[Dict[str, Any]]
Get cached ranking score.
| PARAMETER | DESCRIPTION |
|---|---|
file_path | Path to file TYPE: |
prompt_hash | Hash of the prompt TYPE: |
file_mtime | File modification time TYPE: |
algorithm | Ranking algorithm used TYPE: |
| RETURNS | DESCRIPTION |
|---|---|
Optional[Dict[str, Any]] | Cached score data or None |
Source code in tenets/utils/cache.py
def get(
self,
file_path: Path,
prompt_hash: str,
file_mtime: float,
algorithm: str = "balanced",
) -> Optional[Dict[str, Any]]:
"""Get cached ranking score.
Args:
file_path: Path to file
prompt_hash: Hash of the prompt
file_mtime: File modification time
algorithm: Ranking algorithm used
Returns:
Cached score data or None
"""
key = cache_key(str(file_path), prompt_hash, algorithm)
entry = self._cache.get(key)
if entry is None:
return None
# Check if file was modified since caching
if entry.get("mtime", 0) < file_mtime:
self._cache.delete(key)
return None
return entry
set¶
set(file_path: Path, prompt_hash: str, file_mtime: float, score: float, factors: Dict[str, float], algorithm: str = 'balanced') -> None
Cache a ranking score.
| PARAMETER | DESCRIPTION |
|---|---|
file_path | Path to file TYPE: |
prompt_hash | Hash of the prompt TYPE: |
file_mtime | File modification time TYPE: |
score | Computed relevance score TYPE: |
factors | Individual ranking factors |
algorithm | Ranking algorithm used TYPE: |
Source code in tenets/utils/cache.py
def set(
self,
file_path: Path,
prompt_hash: str,
file_mtime: float,
score: float,
factors: Dict[str, float],
algorithm: str = "balanced",
) -> None:
"""Cache a ranking score.
Args:
file_path: Path to file
prompt_hash: Hash of the prompt
file_mtime: File modification time
score: Computed relevance score
factors: Individual ranking factors
algorithm: Ranking algorithm used
"""
key = cache_key(str(file_path), prompt_hash, algorithm)
self._cache.set(
key,
{
"score": score,
"factors": factors,
"mtime": file_mtime,
"algorithm": algorithm,
},
)
stats¶
Functions¶
cache_key¶
Generate a cache key from arguments.
Creates a deterministic hash from the provided arguments that can be used as a cache key.
| PARAMETER | DESCRIPTION |
|---|---|
*args | Positional arguments to include in key TYPE: |
**kwargs | Keyword arguments to include in key TYPE: |
| RETURNS | DESCRIPTION |
|---|---|
str | A hexadecimal hash string |
Source code in tenets/utils/cache.py
def cache_key(*args: Any, **kwargs: Any) -> str:
"""Generate a cache key from arguments.
Creates a deterministic hash from the provided arguments that can be
used as a cache key.
Args:
*args: Positional arguments to include in key
**kwargs: Keyword arguments to include in key
Returns:
A hexadecimal hash string
"""
# Serialize arguments to JSON for consistent hashing
key_data = {
"args": [_serialize_arg(a) for a in args],
"kwargs": {k: _serialize_arg(v) for k, v in sorted(kwargs.items())},
}
key_str = json.dumps(key_data, sort_keys=True, default=str)
return hashlib.sha256(key_str.encode()).hexdigest()[:16]
get_file_cache¶
get_embedding_cache¶
Get or create the global embedding cache.
get_ranking_cache¶
Get or create the global ranking score cache.
clear_all_caches¶
Clear all global caches.
| RETURNS | DESCRIPTION |
|---|---|
Dict[str, int] | Dictionary with count of cleared entries per cache |
Source code in tenets/utils/cache.py
def clear_all_caches() -> Dict[str, int]:
"""Clear all global caches.
Returns:
Dictionary with count of cleared entries per cache
"""
global _file_cache, _embedding_cache, _ranking_cache
with _cache_lock:
results = {}
if _file_cache is not None:
results["file_content"] = _file_cache.clear()
if _embedding_cache is not None:
results["embeddings"] = _embedding_cache.clear()
if _ranking_cache is not None:
results["ranking_scores"] = _ranking_cache.clear()
return results
get_all_cache_stats¶
Get statistics for all caches.
| RETURNS | DESCRIPTION |
|---|---|
Dict[str, Dict[str, Any]] | Dictionary with stats for each cache type |
Source code in tenets/utils/cache.py
def get_all_cache_stats() -> Dict[str, Dict[str, Any]]:
"""Get statistics for all caches.
Returns:
Dictionary with stats for each cache type
"""
stats = {}
file_cache = get_file_cache()
stats["file_content"] = file_cache.stats()
embedding_cache = get_embedding_cache()
stats["embeddings"] = embedding_cache.stats()
ranking_cache = get_ranking_cache()
stats["ranking_scores"] = ranking_cache.stats()
return stats