python_analyzer
¶
Full name: tenets.core.analysis.implementations.python_analyzer
python_analyzer¶
Python-specific code analyzer using AST.
This module provides comprehensive analysis of Python source code using the Abstract Syntax Tree (AST) module for accurate parsing. It extracts imports, exports, code structure, and calculates various complexity metrics.
Classes¶
PythonAnalyzer¶
Bases: LanguageAnalyzer
Python-specific code analyzer using AST.
Provides deep analysis of Python code including: - Import analysis with tracking of relative imports - Function and class extraction with signatures - Decorator detection - Complexity metrics (cyclomatic, cognitive, Halstead) - Type hint analysis - Docstring extraction - Async function detection
This analyzer uses Python's built-in AST module for accurate parsing, falling back to regex-based extraction when AST parsing fails.
Initialize the Python analyzer with logger.
Source code in tenets/core/analysis/implementations/python_analyzer.py
Functions¶
extract_imports¶
Extract imports from Python code using AST.
Identifies all import statements including: - Standard imports: import os, import sys - From imports: from datetime import datetime - Relative imports: from . import module - Aliased imports: import numpy as np
PARAMETER | DESCRIPTION |
---|---|
content | Python source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[ImportInfo] | List of ImportInfo objects with details about each import |
Source code in tenets/core/analysis/implementations/python_analyzer.py
def extract_imports(self, content: str, file_path: Path) -> List[ImportInfo]:
"""Extract imports from Python code using AST.
Identifies all import statements including:
- Standard imports: import os, import sys
- From imports: from datetime import datetime
- Relative imports: from . import module
- Aliased imports: import numpy as np
Args:
content: Python source code
file_path: Path to the file being analyzed
Returns:
List of ImportInfo objects with details about each import
"""
imports = []
try:
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
# Handle: import module1, module2 as m2
for alias in node.names:
imports.append(
ImportInfo(
module=alias.name,
alias=alias.asname,
line=node.lineno,
type="import",
is_relative=False,
level=0,
)
)
elif isinstance(node, ast.ImportFrom):
# Handle: from module import name1, name2
module = node.module or ""
for alias in node.names:
imported_name = alias.name
# Determine full module path
if imported_name == "*":
full_module = module
else:
full_module = f"{module}.{imported_name}" if module else imported_name
imports.append(
ImportInfo(
module=full_module,
alias=alias.asname,
line=node.lineno,
type="from",
is_relative=node.level > 0,
level=node.level,
from_module=module,
)
)
except SyntaxError as e:
self.logger.debug(f"Syntax error parsing {file_path}: {e}")
# Fallback to regex-based extraction
imports = self._extract_imports_regex(content)
return imports
extract_exports¶
Extract exported symbols from Python code.
Python exports are determined by: 1. Explicit all definition 2. Public symbols (not starting with underscore)
PARAMETER | DESCRIPTION |
---|---|
content | Python source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[Dict[str, Any]] | List of exported symbols with their metadata |
Source code in tenets/core/analysis/implementations/python_analyzer.py
def extract_exports(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
"""Extract exported symbols from Python code.
Python exports are determined by:
1. Explicit __all__ definition
2. Public symbols (not starting with underscore)
Args:
content: Python source code
file_path: Path to the file being analyzed
Returns:
List of exported symbols with their metadata
"""
exports = []
try:
tree = ast.parse(content)
# Look for __all__ definition
has_all = False
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == "__all__":
has_all = True
if isinstance(node.value, ast.List):
for item in node.value.elts:
if isinstance(item, (ast.Constant, ast.Str)):
value = (
item.value if isinstance(item, ast.Constant) else item.s
)
exports.append(
{
"name": value,
"type": "explicit",
"line": node.lineno,
"defined_in_all": True,
}
)
# If no __all__, consider all public symbols
if not has_all:
for node in tree.body:
if isinstance(node, ast.FunctionDef) and not node.name.startswith("_"):
exports.append(
{
"name": node.name,
"type": "function",
"line": node.lineno,
"is_async": isinstance(node, ast.AsyncFunctionDef),
"decorators": [self._get_name(d) for d in node.decorator_list],
}
)
elif isinstance(node, ast.ClassDef) and not node.name.startswith("_"):
exports.append(
{
"name": node.name,
"type": "class",
"line": node.lineno,
"bases": [self._get_name(base) for base in node.bases],
"decorators": [self._get_name(d) for d in node.decorator_list],
}
)
elif isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and not target.id.startswith("_"):
exports.append(
{
"name": target.id,
"type": "variable",
"line": node.lineno,
"is_constant": target.id.isupper(),
}
)
except SyntaxError:
self.logger.debug(f"Syntax error parsing exports from {file_path}")
return exports
extract_structure¶
Extract comprehensive code structure from Python file.
Parses the AST to extract: - Classes with inheritance, methods, and docstrings - Functions with signatures, decorators, and complexity - Global variables and constants - Nested functions and classes
PARAMETER | DESCRIPTION |
---|---|
content | Python source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
CodeStructure | CodeStructure object with complete structural information |
Source code in tenets/core/analysis/implementations/python_analyzer.py
def extract_structure(self, content: str, file_path: Path) -> CodeStructure:
"""Extract comprehensive code structure from Python file.
Parses the AST to extract:
- Classes with inheritance, methods, and docstrings
- Functions with signatures, decorators, and complexity
- Global variables and constants
- Nested functions and classes
Args:
content: Python source code
file_path: Path to the file being analyzed
Returns:
CodeStructure object with complete structural information
"""
structure = CodeStructure()
try:
tree = ast.parse(content)
# Extract classes with full information
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
class_info = ClassInfo(
name=node.name,
line=node.lineno,
end_line=getattr(node, "end_lineno", node.lineno),
base_classes=[self._get_name(base) for base in node.bases],
decorators=[self._get_name(d) for d in node.decorator_list],
methods=[],
docstring=ast.get_docstring(node),
is_abstract=self._is_abstract_class(node),
metaclass=self._get_metaclass(node),
)
# Extract methods and attributes
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
method_info = FunctionInfo(
name=item.name,
line=item.lineno,
end_line=getattr(item, "end_lineno", item.lineno),
decorators=[self._get_name(d) for d in item.decorator_list],
is_async=isinstance(item, ast.AsyncFunctionDef),
docstring=ast.get_docstring(item),
complexity=self._calculate_function_complexity(item),
return_type=self._get_name(item.returns) if item.returns else None,
is_constructor=item.name == "__init__",
is_abstract=any(
self._get_name(d) == "abstractmethod"
for d in item.decorator_list
),
is_static=self._is_static_method(item),
is_class=self._is_class_method(item),
is_property=self._is_property(item),
is_private=item.name.startswith("_")
and not item.name.startswith("__"),
)
class_info.methods.append(method_info)
elif isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
# Class attributes with type hints
class_info.attributes.append(
{
"name": item.target.id,
"line": item.lineno,
"type_hint": (
self._get_name(item.annotation) if item.annotation else None
),
}
)
structure.classes.append(class_info)
# Extract top-level functions
for node in tree.body:
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
func_info = FunctionInfo(
name=node.name,
line=node.lineno,
end_line=getattr(node, "end_lineno", node.lineno),
args=self._extract_function_args(node),
decorators=[self._get_name(d) for d in node.decorator_list],
is_async=isinstance(node, ast.AsyncFunctionDef),
docstring=ast.get_docstring(node),
complexity=self._calculate_function_complexity(node),
return_type=self._get_name(node.returns) if node.returns else None,
is_constructor=False, # Top-level functions are never constructors
is_abstract=any(
self._get_name(d) == "abstractmethod" for d in node.decorator_list
),
is_static=False, # Top-level functions are not static methods
is_class=False, # Top-level functions are not class methods
is_property=self._is_property(
node
), # Top-level properties possible with decorators
is_private=node.name.startswith("_") and not node.name.startswith("__"),
)
structure.functions.append(func_info)
# Extract global variables and constants
for node in tree.body:
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name):
var_info = {
"name": target.id,
"line": node.lineno,
"type": "constant" if target.id.isupper() else "variable",
}
structure.variables.append(var_info)
if target.id.isupper():
structure.constants.append(target.id)
elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
# Variables with type hints
var_info = {
"name": node.target.id,
"line": node.lineno,
"type": "typed_variable",
"type_hint": self._get_name(node.annotation) if node.annotation else None,
}
structure.variables.append(var_info)
# Extract type aliases (Python 3.10+)
if hasattr(ast, "TypeAlias"):
for node in ast.walk(tree):
if isinstance(node, ast.TypeAlias):
structure.type_aliases.append(
{
"name": node.name.id,
"line": node.lineno,
"value": self._get_name(node.value),
}
)
except SyntaxError as e:
self.logger.debug(f"Syntax error parsing structure from {file_path}: {e}")
return structure
calculate_complexity¶
Calculate comprehensive complexity metrics for Python code.
Calculates: - Cyclomatic complexity (McCabe) - Cognitive complexity - Halstead metrics - Maintainability index - Maximum nesting depth - Lines of code metrics
PARAMETER | DESCRIPTION |
---|---|
content | Python source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
ComplexityMetrics | ComplexityMetrics object with all calculated metrics |
Source code in tenets/core/analysis/implementations/python_analyzer.py
def calculate_complexity(self, content: str, file_path: Path) -> ComplexityMetrics:
"""Calculate comprehensive complexity metrics for Python code.
Calculates:
- Cyclomatic complexity (McCabe)
- Cognitive complexity
- Halstead metrics
- Maintainability index
- Maximum nesting depth
- Lines of code metrics
Args:
content: Python source code
file_path: Path to the file being analyzed
Returns:
ComplexityMetrics object with all calculated metrics
"""
metrics = ComplexityMetrics()
try:
tree = ast.parse(content)
# Calculate cyclomatic complexity (McCabe)
cyclomatic = self._calculate_cyclomatic_complexity(tree)
metrics.cyclomatic = cyclomatic
# Calculate cognitive complexity
cognitive = self._calculate_cognitive_complexity(tree)
metrics.cognitive = cognitive
# Calculate Halstead metrics
halstead = self._calculate_halstead_metrics(tree)
metrics.halstead = halstead
# Calculate nesting metrics
metrics.max_depth = self._calculate_max_depth(tree)
# Count code elements
metrics.line_count = content.count("\n") + 1
metrics.function_count = len(
[n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)]
)
metrics.class_count = len([n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)])
metrics.method_count = self._count_methods(tree)
# Calculate comment ratio
metrics.comment_lines = self._count_comment_lines(content)
metrics.comment_ratio = (
metrics.comment_lines / metrics.line_count if metrics.line_count > 0 else 0
)
# Calculate code lines (non-empty, non-comment)
metrics.code_lines = self._count_code_lines(content)
# Calculate maintainability index
# MI = 171 - 5.2 * ln(HV) - 0.23 * CC - 16.2 * ln(LOC) + 50 * sin(sqrt(2.4 * CM))
if halstead and halstead.get("volume", 0) > 0 and metrics.code_lines > 0:
halstead_volume = halstead["volume"]
mi = (
171
- 5.2 * math.log(halstead_volume)
- 0.23 * cyclomatic
- 16.2 * math.log(metrics.code_lines)
+ 50 * math.sin(math.sqrt(2.4 * metrics.comment_ratio))
)
metrics.maintainability_index = max(0, min(100, mi))
except SyntaxError as e:
self.logger.debug(f"Syntax error calculating complexity for {file_path}: {e}")
# Return basic metrics from text analysis
metrics.line_count = content.count("\n") + 1
metrics.code_lines = self._count_code_lines(content)
metrics.comment_lines = self._count_comment_lines(content)
return metrics