strategies
¶
Full name: tenets.core.ranking.strategies
strategies¶
Ranking strategies for different use cases.
This module implements various ranking strategies from simple keyword matching to sophisticated ML-based semantic analysis. Each strategy provides different trade-offs between speed and accuracy.
Now uses centralized NLP components for all text processing and pattern matching. No more duplicate programming patterns or keyword extraction logic.
Classes¶
RankingStrategy¶
FastRankingStrategy¶
Bases: RankingStrategy
Fast keyword-based ranking strategy.
Initialize fast ranking strategy.
Source code in tenets/core/ranking/strategies.py
Functions¶
rank_file¶
rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors
Fast ranking based on keywords and paths.
Source code in tenets/core/ranking/strategies.py
def rank_file(
self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
"""Fast ranking based on keywords and paths."""
factors = RankingFactors()
# Keyword matching with position weighting
factors.keyword_match = self._calculate_keyword_score(file, prompt_context.keywords)
# Path relevance
factors.path_relevance = self._calculate_path_relevance(file.path, prompt_context)
# File type relevance
factors.type_relevance = self._calculate_type_relevance(file, prompt_context)
# Basic git info if available
if hasattr(file, "git_info") and file.git_info:
factors.git_recency = self._calculate_simple_git_recency(file.git_info)
return factors
get_weights¶
Get weights for fast ranking.
BalancedRankingStrategy¶
Bases: RankingStrategy
Balanced multi-factor ranking strategy.
Initialize balanced ranking strategy.
Source code in tenets/core/ranking/strategies.py
Functions¶
rank_file¶
rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors
Balanced ranking using multiple factors.
Source code in tenets/core/ranking/strategies.py
def rank_file(
self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
"""Balanced ranking using multiple factors."""
factors = RankingFactors()
# Enhanced keyword matching
factors.keyword_match = self._calculate_enhanced_keyword_score(
file, prompt_context.keywords
)
# TF-IDF similarity
if corpus_stats.get("tfidf_calculator"):
tfidf_calc = corpus_stats["tfidf_calculator"]
if file.path in tfidf_calc.document_vectors:
factors.tfidf_similarity = tfidf_calc.compute_similarity(
prompt_context.text, file.path
)
# BM25 score
if corpus_stats.get("bm25_calculator"):
bm25_calc = corpus_stats["bm25_calculator"]
query_tokens = bm25_calc.tokenize(prompt_context.text)
factors.bm25_score = min(1.0, bm25_calc.score_document(query_tokens, file.path) / 10)
# Path structure analysis
factors.path_relevance = self._analyze_path_structure(file.path, prompt_context)
# Import centrality
if corpus_stats.get("import_graph"):
factors.import_centrality = self._calculate_import_centrality(
file, corpus_stats["import_graph"]
)
# Git activity
if hasattr(file, "git_info") and file.git_info:
factors.git_recency = self._calculate_git_recency(file.git_info)
factors.git_frequency = self._calculate_git_frequency(file.git_info)
# Complexity relevance
if file.complexity:
factors.complexity_relevance = self._calculate_complexity_relevance(
file.complexity, prompt_context
)
# File type relevance
factors.type_relevance = self._calculate_type_relevance(file, prompt_context)
return factors
get_weights¶
Get weights for balanced ranking.
Source code in tenets/core/ranking/strategies.py
def get_weights(self) -> Dict[str, float]:
"""Get weights for balanced ranking."""
return {
"keyword_match": 0.20,
"bm25_score": 0.25, # BM25 prioritized for better ranking
"tfidf_similarity": 0.10, # TF-IDF as supplementary signal
"path_relevance": 0.15,
"import_centrality": 0.10,
"git_recency": 0.05,
"git_frequency": 0.05,
"complexity_relevance": 0.05,
"type_relevance": 0.05,
}
ThoroughRankingStrategy¶
Bases: RankingStrategy
Thorough deep analysis ranking strategy using centralized NLP.
Initialize thorough ranking strategy with NLP components.
Source code in tenets/core/ranking/strategies.py
def __init__(self):
"""Initialize thorough ranking strategy with NLP components."""
from tenets.utils.logger import get_logger
self.logger = get_logger(__name__)
# Get centralized programming patterns
self.programming_patterns = get_programming_patterns()
# Optional embedding model for semantic similarity; tests patch the
# constructor in ranker module, so import from there.
try: # pragma: no cover - optional dependency
from .ranker import SentenceTransformer as _ST
from .ranker import cosine_similarity as _cos
self._cosine_similarity = _cos
if _ST is not None:
# Tests expect this exact constructor call
self._embedding_model = _ST("all-MiniLM-L6-v2")
else:
self._embedding_model = None
except Exception:
self._embedding_model = None
# Fallback simple cosine if import failed
def _fallback_cos(a, b):
try:
def to_vec(x):
try:
if hasattr(x, "detach"):
x = x.detach()
if hasattr(x, "flatten"):
x = x.flatten()
if hasattr(x, "tolist"):
x = x.tolist()
except Exception:
pass
def flatten(seq):
for item in seq:
if isinstance(item, (list, tuple)):
yield from flatten(item)
else:
try:
yield float(item)
except Exception:
yield 0.0
if isinstance(x, (list, tuple)):
return list(flatten(x))
try:
return [float(x)]
except Exception:
return [0.0]
va = to_vec(a)
vb = to_vec(b)
n = min(len(va), len(vb))
if n == 0:
return 0.0
va = va[:n]
vb = vb[:n]
dot = sum(va[i] * vb[i] for i in range(n))
norm_a = math.sqrt(sum(v * v for v in va)) or 1.0
norm_b = math.sqrt(sum(v * v for v in vb)) or 1.0
return float(dot / (norm_a * norm_b))
except Exception:
return 0.0
self._cosine_similarity = _fallback_cos
Functions¶
rank_file¶
rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors
Thorough ranking with deep analysis using centralized NLP.
Source code in tenets/core/ranking/strategies.py
def rank_file(
self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
"""Thorough ranking with deep analysis using centralized NLP."""
# Start with balanced ranking
balanced = BalancedRankingStrategy()
factors = balanced.rank_file(file, prompt_context, corpus_stats)
# Add deep code pattern analysis using centralized patterns
pattern_scores = self.programming_patterns.analyze_code_patterns(
file.content or "", prompt_context.keywords
)
# Store overall score
factors.code_patterns = pattern_scores.get("overall", 0.0)
# Store individual category scores with clean naming
for category, score in pattern_scores.items():
if category != "overall":
# Use consistent naming: category_patterns
factors.custom_scores[f"{category}_patterns"] = score
# AST-based analysis
if file.structure:
ast_scores = self._analyze_ast_relevance(file, prompt_context)
factors.ast_relevance = ast_scores.get("overall", 0.0)
factors.custom_scores.update(ast_scores)
# Documentation analysis
factors.documentation_score = self._analyze_documentation(file)
# Test coverage relevance
if prompt_context.task_type == "test":
factors.test_coverage = self._analyze_test_coverage(file)
# Dependency depth
if corpus_stats.get("dependency_tree"):
factors.dependency_depth = self._calculate_dependency_depth(
file, corpus_stats["dependency_tree"]
)
# Author relevance (if specific authors mentioned)
if hasattr(file, "git_info") and file.git_info:
factors.git_author_relevance = self._calculate_author_relevance(
file.git_info, prompt_context
)
# Semantic similarity (lightweight embedding-based) if model available
try:
if self._embedding_model and file.content and prompt_context.text:
# Typical usage encodes to tensor; tests provide a mock with unsqueeze
f_emb = self._embedding_model.encode(file.content, convert_to_tensor=True)
if hasattr(f_emb, "unsqueeze"):
f_emb = f_emb.unsqueeze(0)
p_emb = self._embedding_model.encode(prompt_context.text, convert_to_tensor=True)
if hasattr(p_emb, "unsqueeze"):
p_emb = p_emb.unsqueeze(0)
sim = self._cosine_similarity(f_emb, p_emb)
# Handle numpy/tensor scalars with .item()
if hasattr(sim, "item") and callable(sim.item):
sim = sim.item()
factors.semantic_similarity = float(sim) if sim is not None else 0.0
except Exception:
# Be resilient if ML pieces aren't available
pass
return factors
get_weights¶
Get weights for thorough ranking.
Source code in tenets/core/ranking/strategies.py
def get_weights(self) -> Dict[str, float]:
"""Get weights for thorough ranking."""
return {
"keyword_match": 0.15,
"tfidf_similarity": 0.15,
"bm25_score": 0.10,
"path_relevance": 0.10,
"import_centrality": 0.10,
"git_recency": 0.05,
"git_frequency": 0.05,
"complexity_relevance": 0.05,
"type_relevance": 0.05,
"code_patterns": 0.10,
"ast_relevance": 0.05,
"documentation_score": 0.03,
"git_author_relevance": 0.02,
}
MLRankingStrategy¶
Bases: RankingStrategy
Machine Learning-based ranking strategy.
Initialize ML ranking strategy.
Source code in tenets/core/ranking/strategies.py
Functions¶
rank_file¶
rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors
ML-based ranking with semantic similarity.
Source code in tenets/core/ranking/strategies.py
def rank_file(
self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
"""ML-based ranking with semantic similarity."""
# Load model lazily on first use
if not self._model_loaded:
self._load_model()
self._model_loaded = True
# Start with thorough ranking
thorough = ThoroughRankingStrategy()
factors = thorough.rank_file(file, prompt_context, corpus_stats)
# Add semantic similarity if model is available
if self._model and file.content:
factors.semantic_similarity = self._calculate_semantic_similarity(
file.content, prompt_context.text
)
# Boost other factors based on semantic similarity
if factors.semantic_similarity > 0.7:
factors.keyword_match *= 1.2
factors.path_relevance *= 1.1
return factors
get_weights¶
Get weights for ML ranking.
Source code in tenets/core/ranking/strategies.py
def get_weights(self) -> Dict[str, float]:
"""Get weights for ML ranking."""
if self._model:
return {
"semantic_similarity": 0.35,
"keyword_match": 0.10,
"tfidf_similarity": 0.10,
"bm25_score": 0.10,
"path_relevance": 0.10,
"import_centrality": 0.05,
"code_patterns": 0.10,
"ast_relevance": 0.05,
"git_recency": 0.025,
"git_frequency": 0.025,
}
else:
# Fallback to thorough weights if ML not available
return ThoroughRankingStrategy().get_weights()