Skip to content

summarizer_utils

Full name: tenets.core.summarizer.summarizer_utils

summarizer_utils

Shared utilities for summarization.

This module contains common utilities used across different summarizer implementations to avoid code duplication.

Classes

CodeDetector

Unified code detection logic.

Functions
looks_like_codeclassmethod
Python
looks_like_code(text: str, threshold: int = 2) -> bool

Check if text looks like code based on common indicators.

PARAMETERDESCRIPTION
text

Text to check

TYPE:str

threshold

Minimum number of indicators to consider as code

TYPE:intDEFAULT:2

RETURNSDESCRIPTION
bool

True if text appears to be code

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@classmethod
def looks_like_code(cls, text: str, threshold: int = 2) -> bool:
    """Check if text looks like code based on common indicators.

    Args:
        text: Text to check
        threshold: Minimum number of indicators to consider as code

    Returns:
        True if text appears to be code
    """
    if not text:
        return False

    # Check for obvious code patterns
    lines = text.split("\n")[:10]  # Check first 10 lines
    text_sample = "\n".join(lines)

    indicator_count = sum(1 for indicator in cls.CODE_INDICATORS if indicator in text_sample)
    return indicator_count >= threshold
detect_languageclassmethod
Python
detect_language(text: str, file_path: Optional[str] = None) -> Optional[str]

Detect programming language from text content.

PARAMETERDESCRIPTION
text

Code text

TYPE:str

file_path

Optional file path for extension-based detection

TYPE:Optional[str]DEFAULT:None

RETURNSDESCRIPTION
Optional[str]

Detected language or None

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@classmethod
def detect_language(cls, text: str, file_path: Optional[str] = None) -> Optional[str]:
    """Detect programming language from text content.

    Args:
        text: Code text
        file_path: Optional file path for extension-based detection

    Returns:
        Detected language or None
    """
    # First try file extension if available
    if file_path:
        ext = file_path.split(".")[-1].lower() if "." in file_path else ""
        ext_map = {
            "py": "python",
            "js": "javascript",
            "ts": "typescript",
            "java": "java",
            "cpp": "cpp",
            "c": "c",
            "cs": "csharp",
            "rb": "ruby",
            "go": "go",
            "rs": "rust",
            "php": "php",
        }
        if ext in ext_map:
            return ext_map[ext]

    # Fall back to content analysis
    text_lower = text.lower()

    # Python detection
    python_score = sum(1 for ind in cls.PYTHON_INDICATORS if ind in text)
    # JavaScript detection
    js_score = sum(1 for ind in cls.JS_INDICATORS if ind in text_lower)
    # Java detection
    java_score = sum(1 for ind in cls.JAVA_INDICATORS if ind in text)

    scores = {"python": python_score, "javascript": js_score, "java": java_score}

    if max(scores.values()) >= 2:
        return max(scores, key=scores.get)

    return None

ImportParser

Unified import parsing and detection.

Functions
is_import_lineclassmethod
Python
is_import_line(line: str, language: str = 'python') -> bool

Check if a line is an import statement.

PARAMETERDESCRIPTION
line

Line to check

TYPE:str

language

Programming language

TYPE:strDEFAULT:'python'

RETURNSDESCRIPTION
bool

True if line is an import statement

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@classmethod
def is_import_line(cls, line: str, language: str = "python") -> bool:
    """Check if a line is an import statement.

    Args:
        line: Line to check
        language: Programming language

    Returns:
        True if line is an import statement
    """
    line = line.strip()
    if not line:
        return False

    # Language-specific checks
    if language == "python":
        return line.startswith(("import ", "from ")) and not line.startswith("#")
    elif language in ["javascript", "typescript"]:
        return (
            line.startswith("import ")
            or "require(" in line
            or (line.startswith("export ") and "from" in line)
        )
    elif language == "java":
        return line.startswith("import ") and line.endswith(";")
    elif language == "csharp":
        return line.startswith("using ") and line.endswith(";")
    elif language == "go":
        return line.startswith("import ")
    elif language == "rust":
        return line.startswith("use ")
    elif language in ["c", "cpp", "c++"]:
        return line.startswith("#include")

    # Generic check
    return line.startswith(("import ", "from ", "using ", "#include", "use "))
extract_importsclassmethod
Python
extract_imports(text: str, language: str = 'python') -> List[str]

Extract all import statements from code.

PARAMETERDESCRIPTION
text

Source code text

TYPE:str

language

Programming language

TYPE:strDEFAULT:'python'

RETURNSDESCRIPTION
List[str]

List of import statements

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@classmethod
def extract_imports(cls, text: str, language: str = "python") -> List[str]:
    """Extract all import statements from code.

    Args:
        text: Source code text
        language: Programming language

    Returns:
        List of import statements
    """
    imports = []

    if language in cls.IMPORT_PATTERNS:
        for pattern in cls.IMPORT_PATTERNS[language]:
            imports.extend(pattern.findall(text))
    else:
        # Fallback to line-by-line checking
        for line in text.split("\n"):
            if cls.is_import_line(line, language):
                imports.append(line.strip())

    return imports
summarize_importsclassmethod
Python
summarize_imports(imports: List[str], threshold: int = 5) -> str

Summarize a list of imports.

PARAMETERDESCRIPTION
imports

List of import statements

TYPE:List[str]

threshold

Maximum imports before summarizing

TYPE:intDEFAULT:5

RETURNSDESCRIPTION
str

Summarized import text

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@classmethod
def summarize_imports(cls, imports: List[str], threshold: int = 5) -> str:
    """Summarize a list of imports.

    Args:
        imports: List of import statements
        threshold: Maximum imports before summarizing

    Returns:
        Summarized import text
    """
    if not imports:
        return ""

    if len(imports) <= threshold:
        return "\n".join(imports)

    # Group imports by type/source
    stdlib = []
    external = []
    local = []

    for imp in imports:
        imp_lower = imp.lower()
        # Simple heuristic - can be improved
        if imp.startswith(".") or imp.startswith("from ."):
            local.append(imp)
        elif any(
            std in imp_lower for std in ["os", "sys", "json", "math", "datetime", "collections"]
        ):
            stdlib.append(imp)
        else:
            external.append(imp)

    summary_parts = []
    if stdlib:
        summary_parts.append(f"# {len(stdlib)} stdlib imports")
        if len(stdlib) <= 2:
            summary_parts.extend(stdlib)
    if external:
        summary_parts.append(f"# {len(external)} external imports")
        if len(external) <= 2:
            summary_parts.extend(external)
    if local:
        summary_parts.append(f"# {len(local)} local imports")
        if len(local) <= 2:
            summary_parts.extend(local)

    return "\n".join(summary_parts)

ASTParser

Unified AST parsing for code structure extraction.

Functions
extract_python_structurestaticmethod
Python
extract_python_structure(code: str) -> Dict[str, List[Dict[str, str]]]

Extract functions, classes, and docstrings from Python code.

PARAMETERDESCRIPTION
code

Python source code

TYPE:str

RETURNSDESCRIPTION
Dict[str, List[Dict[str, str]]]

Dictionary with 'functions' and 'classes' lists

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@staticmethod
def extract_python_structure(code: str) -> Dict[str, List[Dict[str, str]]]:
    """Extract functions, classes, and docstrings from Python code.

    Args:
        code: Python source code

    Returns:
        Dictionary with 'functions' and 'classes' lists
    """
    structure = {"functions": [], "classes": [], "docstrings": []}

    try:
        tree = ast.parse(code)
    except (SyntaxError, ValueError):
        return structure

    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            func_info = {
                "name": node.name,
                "signature": ASTParser._get_function_signature(node),
                "docstring": ast.get_docstring(node) or "",
                "decorators": [d.id for d in node.decorator_list if hasattr(d, "id")],
            }
            structure["functions"].append(func_info)

        elif isinstance(node, ast.ClassDef):
            class_info = {
                "name": node.name,
                "bases": [ASTParser._get_name(base) for base in node.bases],
                "docstring": ast.get_docstring(node) or "",
                "methods": [],
            }

            # Extract methods
            for item in node.body:
                if isinstance(item, ast.FunctionDef):
                    method_info = {
                        "name": item.name,
                        "signature": ASTParser._get_function_signature(item),
                        "docstring": ast.get_docstring(item) or "",
                    }
                    class_info["methods"].append(method_info)

            structure["classes"].append(class_info)

        elif isinstance(node, ast.Module):
            module_docstring = ast.get_docstring(node)
            if module_docstring:
                structure["docstrings"].append(module_docstring)

    return structure
extract_leading_commentsstaticmethod
Python
extract_leading_comments(text: str, max_lines: int = 10) -> str

Extract leading comments from code.

PARAMETERDESCRIPTION
text

Source code text

TYPE:str

max_lines

Maximum lines to check

TYPE:intDEFAULT:10

RETURNSDESCRIPTION
str

Extracted comments

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@staticmethod
def extract_leading_comments(text: str, max_lines: int = 10) -> str:
    """Extract leading comments from code.

    Args:
        text: Source code text
        max_lines: Maximum lines to check

    Returns:
        Extracted comments
    """
    comments = []
    lines = text.split("\n")[:max_lines]

    for line in lines:
        stripped = line.strip()
        if stripped.startswith("#"):
            comments.append(stripped)
        elif stripped.startswith(('"""', "'''")):
            # Start of docstring
            docstring_lines = []
            in_docstring = True
            quote = stripped[:3]

            for rest_line in lines[lines.index(line) :]:
                docstring_lines.append(rest_line)
                if rest_line.strip().endswith(quote) and len(docstring_lines) > 1:
                    break

            comments.extend(docstring_lines)
            break
        elif stripped and not stripped.startswith(("import ", "from ")):
            # Hit actual code
            break

    return "\n".join(comments)

TextTruncator

Utilities for truncating text while preserving structure.

Functions
smart_truncatestaticmethod
Python
smart_truncate(text: str, max_length: int, preserve_structure: bool = True) -> str

Truncate text intelligently while trying to preserve structure.

PARAMETERDESCRIPTION
text

Text to truncate

TYPE:str

max_length

Maximum character length

TYPE:int

preserve_structure

Whether to preserve code structure

TYPE:boolDEFAULT:True

RETURNSDESCRIPTION
str

Truncated text

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@staticmethod
def smart_truncate(text: str, max_length: int, preserve_structure: bool = True) -> str:
    """Truncate text intelligently while trying to preserve structure.

    Args:
        text: Text to truncate
        max_length: Maximum character length
        preserve_structure: Whether to preserve code structure

    Returns:
        Truncated text
    """
    if len(text) <= max_length:
        return text

    if not preserve_structure:
        return text[:max_length] + "..."

    # Try to truncate at natural boundaries
    lines = text.split("\n")
    result = []
    current_length = 0

    for line in lines:
        line_length = len(line) + 1  # +1 for newline
        if current_length + line_length > max_length:
            # Check if we can at least add part of the line
            remaining = max_length - current_length
            if remaining > 20:  # Arbitrary min line length
                result.append(line[: remaining - 3] + "...")
            else:
                result.append("...")
            break
        result.append(line)
        current_length += line_length

    return "\n".join(result)
truncate_middlestaticmethod
Python
truncate_middle(text: str, max_length: int, context_ratio: float = 0.3) -> str

Truncate middle of text while preserving beginning and end.

PARAMETERDESCRIPTION
text

Text to truncate

TYPE:str

max_length

Maximum character length

TYPE:int

context_ratio

Ratio of text to keep at beginning/end

TYPE:floatDEFAULT:0.3

RETURNSDESCRIPTION
str

Text with middle truncated

Source code in tenets/core/summarizer/summarizer_utils.py
Python
@staticmethod
def truncate_middle(text: str, max_length: int, context_ratio: float = 0.3) -> str:
    """Truncate middle of text while preserving beginning and end.

    Args:
        text: Text to truncate
        max_length: Maximum character length
        context_ratio: Ratio of text to keep at beginning/end

    Returns:
        Text with middle truncated
    """
    if len(text) <= max_length:
        return text

    # Calculate how much to keep from beginning and end
    keep_start = int(max_length * context_ratio)
    keep_end = int(max_length * context_ratio)

    # Ensure we have room for the ellipsis
    ellipsis = "\n...[truncated]...\n"
    keep_start = min(keep_start, (max_length - len(ellipsis)) // 2)
    keep_end = min(keep_end, (max_length - len(ellipsis)) // 2)

    return text[:keep_start] + ellipsis + text[-keep_end:]