Skip to content

python_analyzer

Full name: tenets.core.analysis.implementations.python_analyzer

python_analyzer

Python-specific code analyzer using AST.

This module provides comprehensive analysis of Python source code using the Abstract Syntax Tree (AST) module for accurate parsing. It extracts imports, exports, code structure, and calculates various complexity metrics.

Classes

PythonAnalyzer

Python
PythonAnalyzer()

Bases: LanguageAnalyzer

Python-specific code analyzer using AST.

Provides deep analysis of Python code including: - Import analysis with tracking of relative imports - Function and class extraction with signatures - Decorator detection - Complexity metrics (cyclomatic, cognitive, Halstead) - Type hint analysis - Docstring extraction - Async function detection

This analyzer uses Python's built-in AST module for accurate parsing, falling back to regex-based extraction when AST parsing fails.

Initialize the Python analyzer with logger.

Source code in tenets/core/analysis/implementations/python_analyzer.py
Python
def __init__(self):
    """Initialize the Python analyzer with logger."""
    self.logger = get_logger(__name__)
Functions
extract_imports
Python
extract_imports(content: str, file_path: Path) -> List[ImportInfo]

Extract imports from Python code using AST.

Identifies all import statements including: - Standard imports: import os, import sys - From imports: from datetime import datetime - Relative imports: from . import module - Aliased imports: import numpy as np

PARAMETERDESCRIPTION
content

Python source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
List[ImportInfo]

List of ImportInfo objects with details about each import

Source code in tenets/core/analysis/implementations/python_analyzer.py
Python
def extract_imports(self, content: str, file_path: Path) -> List[ImportInfo]:
    """Extract imports from Python code using AST.

    Identifies all import statements including:
    - Standard imports: import os, import sys
    - From imports: from datetime import datetime
    - Relative imports: from . import module
    - Aliased imports: import numpy as np

    Args:
        content: Python source code
        file_path: Path to the file being analyzed

    Returns:
        List of ImportInfo objects with details about each import
    """
    imports = []

    try:
        tree = ast.parse(content)

        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                # Handle: import module1, module2 as m2
                for alias in node.names:
                    imports.append(
                        ImportInfo(
                            module=alias.name,
                            alias=alias.asname,
                            line=node.lineno,
                            type="import",
                            is_relative=False,
                            level=0,
                        )
                    )

            elif isinstance(node, ast.ImportFrom):
                # Handle: from module import name1, name2
                module = node.module or ""
                for alias in node.names:
                    imported_name = alias.name

                    # Determine full module path
                    if imported_name == "*":
                        full_module = module
                    else:
                        full_module = f"{module}.{imported_name}" if module else imported_name

                    imports.append(
                        ImportInfo(
                            module=full_module,
                            alias=alias.asname,
                            line=node.lineno,
                            type="from",
                            is_relative=node.level > 0,
                            level=node.level,
                            from_module=module,
                        )
                    )

    except SyntaxError as e:
        self.logger.debug(f"Syntax error parsing {file_path}: {e}")
        # Fallback to regex-based extraction
        imports = self._extract_imports_regex(content)

    return imports
extract_exports
Python
extract_exports(content: str, file_path: Path) -> List[Dict[str, Any]]

Extract exported symbols from Python code.

Python exports are determined by: 1. Explicit all definition 2. Public symbols (not starting with underscore)

PARAMETERDESCRIPTION
content

Python source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
List[Dict[str, Any]]

List of exported symbols with their metadata

Source code in tenets/core/analysis/implementations/python_analyzer.py
Python
def extract_exports(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
    """Extract exported symbols from Python code.

    Python exports are determined by:
    1. Explicit __all__ definition
    2. Public symbols (not starting with underscore)

    Args:
        content: Python source code
        file_path: Path to the file being analyzed

    Returns:
        List of exported symbols with their metadata
    """
    exports = []

    try:
        tree = ast.parse(content)

        # Look for __all__ definition
        has_all = False
        for node in ast.walk(tree):
            if isinstance(node, ast.Assign):
                for target in node.targets:
                    if isinstance(target, ast.Name) and target.id == "__all__":
                        has_all = True
                        if isinstance(node.value, ast.List):
                            for item in node.value.elts:
                                if isinstance(item, (ast.Constant, ast.Str)):
                                    value = (
                                        item.value if isinstance(item, ast.Constant) else item.s
                                    )
                                    exports.append(
                                        {
                                            "name": value,
                                            "type": "explicit",
                                            "line": node.lineno,
                                            "defined_in_all": True,
                                        }
                                    )

        # If no __all__, consider all public symbols
        if not has_all:
            for node in tree.body:
                if isinstance(node, ast.FunctionDef) and not node.name.startswith("_"):
                    exports.append(
                        {
                            "name": node.name,
                            "type": "function",
                            "line": node.lineno,
                            "is_async": isinstance(node, ast.AsyncFunctionDef),
                            "decorators": [self._get_name(d) for d in node.decorator_list],
                        }
                    )
                elif isinstance(node, ast.ClassDef) and not node.name.startswith("_"):
                    exports.append(
                        {
                            "name": node.name,
                            "type": "class",
                            "line": node.lineno,
                            "bases": [self._get_name(base) for base in node.bases],
                            "decorators": [self._get_name(d) for d in node.decorator_list],
                        }
                    )
                elif isinstance(node, ast.Assign):
                    for target in node.targets:
                        if isinstance(target, ast.Name) and not target.id.startswith("_"):
                            exports.append(
                                {
                                    "name": target.id,
                                    "type": "variable",
                                    "line": node.lineno,
                                    "is_constant": target.id.isupper(),
                                }
                            )

    except SyntaxError:
        self.logger.debug(f"Syntax error parsing exports from {file_path}")

    return exports
extract_structure
Python
extract_structure(content: str, file_path: Path) -> CodeStructure

Extract comprehensive code structure from Python file.

Parses the AST to extract: - Classes with inheritance, methods, and docstrings - Functions with signatures, decorators, and complexity - Global variables and constants - Nested functions and classes

PARAMETERDESCRIPTION
content

Python source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
CodeStructure

CodeStructure object with complete structural information

Source code in tenets/core/analysis/implementations/python_analyzer.py
Python
def extract_structure(self, content: str, file_path: Path) -> CodeStructure:
    """Extract comprehensive code structure from Python file.

    Parses the AST to extract:
    - Classes with inheritance, methods, and docstrings
    - Functions with signatures, decorators, and complexity
    - Global variables and constants
    - Nested functions and classes

    Args:
        content: Python source code
        file_path: Path to the file being analyzed

    Returns:
        CodeStructure object with complete structural information
    """
    structure = CodeStructure()

    try:
        tree = ast.parse(content)

        # Extract classes with full information
        for node in ast.walk(tree):
            if isinstance(node, ast.ClassDef):
                class_info = ClassInfo(
                    name=node.name,
                    line=node.lineno,
                    end_line=getattr(node, "end_lineno", node.lineno),
                    base_classes=[self._get_name(base) for base in node.bases],
                    decorators=[self._get_name(d) for d in node.decorator_list],
                    methods=[],
                    docstring=ast.get_docstring(node),
                    is_abstract=self._is_abstract_class(node),
                    metaclass=self._get_metaclass(node),
                )

                # Extract methods and attributes
                for item in node.body:
                    if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
                        method_info = FunctionInfo(
                            name=item.name,
                            line=item.lineno,
                            end_line=getattr(item, "end_lineno", item.lineno),
                            decorators=[self._get_name(d) for d in item.decorator_list],
                            is_async=isinstance(item, ast.AsyncFunctionDef),
                            docstring=ast.get_docstring(item),
                            complexity=self._calculate_function_complexity(item),
                            return_type=self._get_name(item.returns) if item.returns else None,
                            is_constructor=item.name == "__init__",
                            is_abstract=any(
                                self._get_name(d) == "abstractmethod"
                                for d in item.decorator_list
                            ),
                            is_static=self._is_static_method(item),
                            is_class=self._is_class_method(item),
                            is_property=self._is_property(item),
                            is_private=item.name.startswith("_")
                            and not item.name.startswith("__"),
                        )
                        class_info.methods.append(method_info)
                    elif isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
                        # Class attributes with type hints
                        class_info.attributes.append(
                            {
                                "name": item.target.id,
                                "line": item.lineno,
                                "type_hint": (
                                    self._get_name(item.annotation) if item.annotation else None
                                ),
                            }
                        )

                structure.classes.append(class_info)

        # Extract top-level functions
        for node in tree.body:
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                func_info = FunctionInfo(
                    name=node.name,
                    line=node.lineno,
                    end_line=getattr(node, "end_lineno", node.lineno),
                    args=self._extract_function_args(node),
                    decorators=[self._get_name(d) for d in node.decorator_list],
                    is_async=isinstance(node, ast.AsyncFunctionDef),
                    docstring=ast.get_docstring(node),
                    complexity=self._calculate_function_complexity(node),
                    return_type=self._get_name(node.returns) if node.returns else None,
                    is_constructor=False,  # Top-level functions are never constructors
                    is_abstract=any(
                        self._get_name(d) == "abstractmethod" for d in node.decorator_list
                    ),
                    is_static=False,  # Top-level functions are not static methods
                    is_class=False,  # Top-level functions are not class methods
                    is_property=self._is_property(
                        node
                    ),  # Top-level properties possible with decorators
                    is_private=node.name.startswith("_") and not node.name.startswith("__"),
                )
                structure.functions.append(func_info)

        # Extract global variables and constants
        for node in tree.body:
            if isinstance(node, ast.Assign):
                for target in node.targets:
                    if isinstance(target, ast.Name):
                        var_info = {
                            "name": target.id,
                            "line": node.lineno,
                            "type": "constant" if target.id.isupper() else "variable",
                        }
                        structure.variables.append(var_info)

                        if target.id.isupper():
                            structure.constants.append(target.id)

            elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
                # Variables with type hints
                var_info = {
                    "name": node.target.id,
                    "line": node.lineno,
                    "type": "typed_variable",
                    "type_hint": self._get_name(node.annotation) if node.annotation else None,
                }
                structure.variables.append(var_info)

        # Extract type aliases (Python 3.10+)
        if hasattr(ast, "TypeAlias"):
            for node in ast.walk(tree):
                if isinstance(node, ast.TypeAlias):
                    structure.type_aliases.append(
                        {
                            "name": node.name.id,
                            "line": node.lineno,
                            "value": self._get_name(node.value),
                        }
                    )

    except SyntaxError as e:
        self.logger.debug(f"Syntax error parsing structure from {file_path}: {e}")

    return structure
calculate_complexity
Python
calculate_complexity(content: str, file_path: Path) -> ComplexityMetrics

Calculate comprehensive complexity metrics for Python code.

Calculates: - Cyclomatic complexity (McCabe) - Cognitive complexity - Halstead metrics - Maintainability index - Maximum nesting depth - Lines of code metrics

PARAMETERDESCRIPTION
content

Python source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
ComplexityMetrics

ComplexityMetrics object with all calculated metrics

Source code in tenets/core/analysis/implementations/python_analyzer.py
Python
def calculate_complexity(self, content: str, file_path: Path) -> ComplexityMetrics:
    """Calculate comprehensive complexity metrics for Python code.

    Calculates:
    - Cyclomatic complexity (McCabe)
    - Cognitive complexity
    - Halstead metrics
    - Maintainability index
    - Maximum nesting depth
    - Lines of code metrics

    Args:
        content: Python source code
        file_path: Path to the file being analyzed

    Returns:
        ComplexityMetrics object with all calculated metrics
    """
    metrics = ComplexityMetrics()

    try:
        tree = ast.parse(content)

        # Calculate cyclomatic complexity (McCabe)
        cyclomatic = self._calculate_cyclomatic_complexity(tree)
        metrics.cyclomatic = cyclomatic

        # Calculate cognitive complexity
        cognitive = self._calculate_cognitive_complexity(tree)
        metrics.cognitive = cognitive

        # Calculate Halstead metrics
        halstead = self._calculate_halstead_metrics(tree)
        metrics.halstead = halstead

        # Calculate nesting metrics
        metrics.max_depth = self._calculate_max_depth(tree)

        # Count code elements
        metrics.line_count = content.count("\n") + 1
        metrics.function_count = len(
            [n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)]
        )
        metrics.class_count = len([n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)])
        metrics.method_count = self._count_methods(tree)

        # Calculate comment ratio
        metrics.comment_lines = self._count_comment_lines(content)
        metrics.comment_ratio = (
            metrics.comment_lines / metrics.line_count if metrics.line_count > 0 else 0
        )

        # Calculate code lines (non-empty, non-comment)
        metrics.code_lines = self._count_code_lines(content)

        # Calculate maintainability index
        # MI = 171 - 5.2 * ln(HV) - 0.23 * CC - 16.2 * ln(LOC) + 50 * sin(sqrt(2.4 * CM))
        if halstead and halstead.get("volume", 0) > 0 and metrics.code_lines > 0:
            halstead_volume = halstead["volume"]
            mi = (
                171
                - 5.2 * math.log(halstead_volume)
                - 0.23 * cyclomatic
                - 16.2 * math.log(metrics.code_lines)
                + 50 * math.sin(math.sqrt(2.4 * metrics.comment_ratio))
            )
            metrics.maintainability_index = max(0, min(100, mi))

    except SyntaxError as e:
        self.logger.debug(f"Syntax error calculating complexity for {file_path}: {e}")
        # Return basic metrics from text analysis
        metrics.line_count = content.count("\n") + 1
        metrics.code_lines = self._count_code_lines(content)
        metrics.comment_lines = self._count_comment_lines(content)

    return metrics

Functions