`strategies`¶

Full name: tenets.core.ranking.strategies

strategies¶

Ranking strategies for different use cases.

This module implements various ranking strategies from simple keyword matching to sophisticated ML-based semantic analysis. Each strategy provides different trade-offs between speed and accuracy.

Now uses centralized NLP components for all text processing and pattern matching. No more duplicate programming patterns or keyword extraction logic.

Classes¶

RankingStrategy¶

Bases: ABC

Abstract base class for ranking strategies.

Attributes¶

name`abstractmethodproperty`¶

Python

name: str

Get strategy name.

description`abstractmethodproperty`¶

Python

description: str

Get strategy description.

Functions¶

rank_file`abstractmethod`¶

Python

rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors

Calculate ranking factors for a file.

Source code in tenets/core/ranking/strategies.py

Python

@abstractmethod
def rank_file(
    self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
    """Calculate ranking factors for a file."""
    pass

get_weights`abstractmethod`¶

Python

get_weights() -> Dict[str, float]

Get factor weights for this strategy.

Source code in tenets/core/ranking/strategies.py

Python

@abstractmethod
def get_weights(self) -> Dict[str, float]:
    """Get factor weights for this strategy."""
    pass

FastRankingStrategy¶

Python

FastRankingStrategy()

Bases: RankingStrategy

Fast keyword-based ranking strategy.

Initialize fast ranking strategy.

Source code in tenets/core/ranking/strategies.py

Python

def __init__(self):
    """Initialize fast ranking strategy."""
    from tenets.utils.logger import get_logger

    self.logger = get_logger(__name__)

Functions¶

rank_file¶

Python

rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors

Fast ranking based on keywords and paths.

Source code in tenets/core/ranking/strategies.py

Python

def rank_file(
    self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
    """Fast ranking based on keywords and paths."""
    factors = RankingFactors()

    # Keyword matching with position weighting
    factors.keyword_match = self._calculate_keyword_score(file, prompt_context.keywords)

    # Path relevance
    factors.path_relevance = self._calculate_path_relevance(file.path, prompt_context)

    # File type relevance
    factors.type_relevance = self._calculate_type_relevance(file, prompt_context)

    # Basic git info if available
    if hasattr(file, "git_info") and file.git_info:
        factors.git_recency = self._calculate_simple_git_recency(file.git_info)

    return factors

get_weights¶

Python

get_weights() -> Dict[str, float]

Get weights for fast ranking.

Source code in tenets/core/ranking/strategies.py

Python

def get_weights(self) -> Dict[str, float]:
    """Get weights for fast ranking."""
    # Keep this minimal set and exact values as tests assert equality
    return {
        "keyword_match": 0.6,
        "path_relevance": 0.3,
        "type_relevance": 0.1,
    }

BalancedRankingStrategy¶

Python

BalancedRankingStrategy()

Bases: RankingStrategy

Balanced multi-factor ranking strategy.

Initialize balanced ranking strategy.

Source code in tenets/core/ranking/strategies.py

Python

def __init__(self):
    """Initialize balanced ranking strategy."""
    from tenets.utils.logger import get_logger

    self.logger = get_logger(__name__)

Functions¶

rank_file¶

Python

rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors

Balanced ranking using multiple factors.

Source code in tenets/core/ranking/strategies.py

Python

def rank_file(
    self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
    """Balanced ranking using multiple factors."""
    factors = RankingFactors()

    # Enhanced keyword matching
    factors.keyword_match = self._calculate_enhanced_keyword_score(
        file, prompt_context.keywords
    )

    # TF-IDF similarity
    if corpus_stats.get("tfidf_calculator"):
        tfidf_calc = corpus_stats["tfidf_calculator"]
        if file.path in tfidf_calc.document_vectors:
            factors.tfidf_similarity = tfidf_calc.compute_similarity(
                prompt_context.text, file.path
            )

    # BM25 score
    if corpus_stats.get("bm25_calculator"):
        bm25_calc = corpus_stats["bm25_calculator"]
        query_tokens = bm25_calc.tokenize(prompt_context.text)
        factors.bm25_score = min(1.0, bm25_calc.score_document(query_tokens, file.path) / 10)

    # Path structure analysis
    factors.path_relevance = self._analyze_path_structure(file.path, prompt_context)

    # Import centrality
    if corpus_stats.get("import_graph"):
        factors.import_centrality = self._calculate_import_centrality(
            file, corpus_stats["import_graph"]
        )

    # Git activity
    if hasattr(file, "git_info") and file.git_info:
        factors.git_recency = self._calculate_git_recency(file.git_info)
        factors.git_frequency = self._calculate_git_frequency(file.git_info)

    # Complexity relevance
    if file.complexity:
        factors.complexity_relevance = self._calculate_complexity_relevance(
            file.complexity, prompt_context
        )

    # File type relevance
    factors.type_relevance = self._calculate_type_relevance(file, prompt_context)

    return factors

get_weights¶

Python

get_weights() -> Dict[str, float]

Get weights for balanced ranking.

Source code in tenets/core/ranking/strategies.py

Python

def get_weights(self) -> Dict[str, float]:
    """Get weights for balanced ranking."""
    return {
        "keyword_match": 0.20,
        "bm25_score": 0.25,  # BM25 prioritized for better ranking
        "tfidf_similarity": 0.10,  # TF-IDF as supplementary signal
        "path_relevance": 0.15,
        "import_centrality": 0.10,
        "git_recency": 0.05,
        "git_frequency": 0.05,
        "complexity_relevance": 0.05,
        "type_relevance": 0.05,
    }

ThoroughRankingStrategy¶

Python

ThoroughRankingStrategy()

Bases: RankingStrategy

Thorough deep analysis ranking strategy using centralized NLP.

Initialize thorough ranking strategy with NLP components.

Source code in tenets/core/ranking/strategies.py

Python

def __init__(self):
    """Initialize thorough ranking strategy with NLP components."""
    from tenets.utils.logger import get_logger

    self.logger = get_logger(__name__)
    # Get centralized programming patterns
    self.programming_patterns = get_programming_patterns()
    # Import cosine similarity - check module level first for test patching
    import sys

    ranker_module = sys.modules.get("tenets.core.ranking.ranker")
    if ranker_module and hasattr(ranker_module, "cosine_similarity"):
        self._cosine_similarity = ranker_module.cosine_similarity
    else:
        try:
            from tenets.core.nlp.similarity import cosine_similarity as _cos

            self._cosine_similarity = _cos
        except ImportError:
            # Fallback for when nlp package not available
            def cosine_similarity(a, b):
                import math

                if not a or not b:
                    return 0.0
                dot = sum(x * y for x, y in zip(a, b))
                norm_a = math.sqrt(sum(x * x for x in a))
                norm_b = math.sqrt(sum(y * y for y in b))
                if norm_a == 0 or norm_b == 0:
                    return 0.0
                return dot / (norm_a * norm_b)

            self._cosine_similarity = cosine_similarity

    # Optional embedding model for semantic similarity
    try:  # pragma: no cover - optional dependency
        # Check if SentenceTransformer is available at module level (for test patching)
        if ranker_module and hasattr(ranker_module, "SentenceTransformer"):
            _ST = ranker_module.SentenceTransformer
        else:
            # Import directly from sentence_transformers
            from sentence_transformers import SentenceTransformer as _ST

        if _ST is not None:
            # Tests expect this exact constructor call
            self._embedding_model = _ST("all-MiniLM-L6-v2")
        else:
            self._embedding_model = None
    except Exception:
        self._embedding_model = None

        # Fallback simple cosine if import failed
        def _fallback_cos(a, b):
            try:

                def to_vec(x):
                    try:
                        if hasattr(x, "detach"):
                            x = x.detach()
                        if hasattr(x, "flatten"):
                            x = x.flatten()
                        if hasattr(x, "tolist"):
                            x = x.tolist()
                    except Exception:
                        pass

                    def flatten(seq):
                        for item in seq:
                            if isinstance(item, (list, tuple)):
                                yield from flatten(item)
                            else:
                                try:
                                    yield float(item)
                                except Exception:
                                    yield 0.0

                    if isinstance(x, (list, tuple)):
                        return list(flatten(x))
                    try:
                        return [float(x)]
                    except Exception:
                        return [0.0]

                va = to_vec(a)
                vb = to_vec(b)
                n = min(len(va), len(vb))
                if n == 0:
                    return 0.0
                va = va[:n]
                vb = vb[:n]
                dot = sum(va[i] * vb[i] for i in range(n))
                norm_a = math.sqrt(sum(v * v for v in va)) or 1.0
                norm_b = math.sqrt(sum(v * v for v in vb)) or 1.0
                return float(dot / (norm_a * norm_b))
            except Exception:
                return 0.0

        self._cosine_similarity = _fallback_cos

Functions¶

rank_file¶

Python

rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors

Thorough ranking with deep analysis using centralized NLP.

Source code in tenets/core/ranking/strategies.py

Python

def rank_file(
    self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
    """Thorough ranking with deep analysis using centralized NLP."""
    # Start with balanced ranking
    balanced = BalancedRankingStrategy()
    factors = balanced.rank_file(file, prompt_context, corpus_stats)

    # Add deep code pattern analysis using centralized patterns
    pattern_scores = self.programming_patterns.analyze_code_patterns(
        file.content or "", prompt_context.keywords
    )

    # Store overall score
    factors.code_patterns = pattern_scores.get("overall", 0.0)

    # Store individual category scores with clean naming
    for category, score in pattern_scores.items():
        if category != "overall":
            # Use consistent naming: category_patterns
            factors.custom_scores[f"{category}_patterns"] = score

    # AST-based analysis
    if file.structure:
        ast_scores = self._analyze_ast_relevance(file, prompt_context)
        factors.ast_relevance = ast_scores.get("overall", 0.0)
        factors.custom_scores.update(ast_scores)

    # Documentation analysis
    factors.documentation_score = self._analyze_documentation(file)

    # Test coverage relevance
    if prompt_context.task_type == "test":
        factors.test_coverage = self._analyze_test_coverage(file)

    # Dependency depth
    if corpus_stats.get("dependency_tree"):
        factors.dependency_depth = self._calculate_dependency_depth(
            file, corpus_stats["dependency_tree"]
        )

    # Author relevance (if specific authors mentioned)
    if hasattr(file, "git_info") and file.git_info:
        factors.git_author_relevance = self._calculate_author_relevance(
            file.git_info, prompt_context
        )

    # Semantic similarity (lightweight embedding-based) if model available
    try:
        if self._embedding_model and file.content and prompt_context.text:
            # Typical usage encodes to tensor; tests provide a mock with unsqueeze
            f_emb = self._embedding_model.encode(file.content, convert_to_tensor=True)
            if hasattr(f_emb, "unsqueeze"):
                f_emb = f_emb.unsqueeze(0)
            p_emb = self._embedding_model.encode(prompt_context.text, convert_to_tensor=True)
            if hasattr(p_emb, "unsqueeze"):
                p_emb = p_emb.unsqueeze(0)
            sim = self._cosine_similarity(f_emb, p_emb)
            # Handle numpy/tensor scalars with .item()
            if hasattr(sim, "item") and callable(sim.item):
                sim = sim.item()
            factors.semantic_similarity = float(sim) if sim is not None else 0.0
    except Exception:
        # Be resilient if ML pieces aren't available
        pass

    return factors

get_weights¶

Python

get_weights() -> Dict[str, float]

Get weights for thorough ranking.

Source code in tenets/core/ranking/strategies.py

Python

def get_weights(self) -> Dict[str, float]:
    """Get weights for thorough ranking."""
    return {
        "keyword_match": 0.15,
        "tfidf_similarity": 0.15,
        "bm25_score": 0.10,
        "path_relevance": 0.10,
        "import_centrality": 0.10,
        "git_recency": 0.05,
        "git_frequency": 0.05,
        "complexity_relevance": 0.05,
        "type_relevance": 0.05,
        "code_patterns": 0.10,
        "ast_relevance": 0.05,
        "documentation_score": 0.03,
        "git_author_relevance": 0.02,
    }

MLRankingStrategy¶

Python

MLRankingStrategy()

Bases: RankingStrategy

Machine Learning-based ranking strategy.

Initialize ML ranking strategy.

Source code in tenets/core/ranking/strategies.py

Python

def __init__(self):
    """Initialize ML ranking strategy."""
    from collections import OrderedDict

    from tenets.utils.logger import get_logger

    self.logger = get_logger(__name__)
    self._model = None
    # Use OrderedDict with size limit for embeddings cache
    self._embeddings_cache = OrderedDict()
    self._cache_max_size = 1000  # Limit cache size to prevent unbounded growth
    self._model_loaded = False
    self._reranker = None  # Neural reranker for cross-encoder
    self._reranker_loaded = False

Functions¶

rank_file¶

Python

rank_file(file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]) -> RankingFactors

ML-based ranking with semantic similarity.

Source code in tenets/core/ranking/strategies.py

Python

def rank_file(
    self, file: FileAnalysis, prompt_context: PromptContext, corpus_stats: Dict[str, Any]
) -> RankingFactors:
    """ML-based ranking with semantic similarity."""
    # Load model lazily on first use
    if not self._model_loaded:
        self._load_model()
        self._model_loaded = True

    # Start with thorough ranking
    thorough = ThoroughRankingStrategy()
    factors = thorough.rank_file(file, prompt_context, corpus_stats)

    # Add semantic similarity if model is available
    if self._model and file.content:
        factors.semantic_similarity = self._calculate_semantic_similarity(
            file.content, prompt_context.text
        )

        # Boost other factors based on semantic similarity
        if factors.semantic_similarity > 0.7:
            factors.keyword_match *= 1.2
            factors.path_relevance *= 1.1

    return factors

get_weights¶

Python

get_weights() -> Dict[str, float]

Get weights for ML ranking.

Source code in tenets/core/ranking/strategies.py

Python

def get_weights(self) -> Dict[str, float]:
    """Get weights for ML ranking."""
    if self._model:
        return {
            "semantic_similarity": 0.35,
            "keyword_match": 0.10,
            "tfidf_similarity": 0.10,
            "bm25_score": 0.10,
            "path_relevance": 0.10,
            "import_centrality": 0.05,
            "code_patterns": 0.10,
            "ast_relevance": 0.05,
            "git_recency": 0.025,
            "git_frequency": 0.025,
        }
    else:
        # Fallback to thorough weights if ML not available
        return ThoroughRankingStrategy().get_weights()

strategies¶

strategies¶

Classes¶

RankingStrategy¶

Attributes¶

nameabstractmethodproperty¶

descriptionabstractmethodproperty¶

Functions¶

rank_fileabstractmethod¶

get_weightsabstractmethod¶

FastRankingStrategy¶

Functions¶

rank_file¶

get_weights¶

BalancedRankingStrategy¶

Functions¶

rank_file¶

get_weights¶

ThoroughRankingStrategy¶

Functions¶

rank_file¶

get_weights¶

MLRankingStrategy¶

Functions¶

rank_file¶

get_weights¶

Functions¶

`strategies`¶

name`abstractmethodproperty`¶

description`abstractmethodproperty`¶

rank_file`abstractmethod`¶

get_weights`abstractmethod`¶