java_analyzer
¶
Full name: tenets.core.analysis.implementations.java_analyzer
java_analyzer¶
Java code analyzer.
This module provides comprehensive analysis for Java source files, including support for modern Java features, annotations, and frameworks.
Classes¶
JavaAnalyzer¶
Bases: LanguageAnalyzer
Java code analyzer.
Provides comprehensive analysis for Java files including: - Import analysis with static and wildcard imports - Package structure analysis - Class, interface, enum, and record extraction - Annotation processing - Generic type analysis - Method and field extraction with modifiers - Inner and anonymous class detection - Lambda expression support - Framework detection (Spring, JUnit, etc.)
Supports modern Java features including records, sealed classes, pattern matching, and text blocks.
Initialize the Java analyzer with logger.
Source code in tenets/core/analysis/implementations/java_analyzer.py
Functions¶
extract_imports¶
Extract imports from Java code.
Handles: - Standard imports: import java.util.List; - Static imports: import static java.lang.Math.PI; - Wildcard imports: import java.util.; - Static wildcard: import static org.junit.Assert.;
PARAMETER | DESCRIPTION |
---|---|
content | Java source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[ImportInfo] | List of ImportInfo objects with import details |
Source code in tenets/core/analysis/implementations/java_analyzer.py
def extract_imports(self, content: str, file_path: Path) -> List[ImportInfo]:
"""Extract imports from Java code.
Handles:
- Standard imports: import java.util.List;
- Static imports: import static java.lang.Math.PI;
- Wildcard imports: import java.util.*;
- Static wildcard: import static org.junit.Assert.*;
Args:
content: Java source code
file_path: Path to the file being analyzed
Returns:
List of ImportInfo objects with import details
"""
imports = []
lines = content.split("\n")
# Import patterns
import_pattern = re.compile(r"^\s*import\s+(?:(static)\s+)?([a-zA-Z0-9_.]+(?:\.\*)?)\s*;")
for i, line in enumerate(lines, 1):
# Skip comments
if line.strip().startswith("//") or line.strip().startswith("/*"):
continue
# Stop at class/interface/enum declaration
if re.match(r"^\s*(?:public\s+)?(?:class|interface|enum|record)\s+", line):
break
match = import_pattern.match(line)
if match:
is_static = match.group(1) == "static"
module = match.group(2)
is_wildcard = module.endswith(".*")
# Determine import category
category = self._categorize_java_import(module)
imports.append(
ImportInfo(
module=module,
line=i,
type="static" if is_static else "import",
is_wildcard=is_wildcard,
is_relative=False,
category=category,
package=(
module.rsplit(".", 1)[0]
if "." in module and not is_wildcard
else module.rstrip(".*")
),
)
)
return imports
extract_exports¶
Extract public members from Java code.
In Java, public members are exported from a class/package. This includes public classes, interfaces, enums, methods, and fields.
PARAMETER | DESCRIPTION |
---|---|
content | Java source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[Dict[str, Any]] | List of exported (public) symbols with metadata |
Source code in tenets/core/analysis/implementations/java_analyzer.py
def extract_exports(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
"""Extract public members from Java code.
In Java, public members are exported from a class/package.
This includes public classes, interfaces, enums, methods, and fields.
Args:
content: Java source code
file_path: Path to the file being analyzed
Returns:
List of exported (public) symbols with metadata
"""
exports = []
# Extract package name
package_match = re.search(r"^\s*package\s+([\w.]+)\s*;", content, re.MULTILINE)
package_name = package_match.group(1) if package_match else ""
# Public classes
class_pattern = r"(?:^|\n)\s*public\s+(?:(abstract|final)\s+)?class\s+(\w+)(?:<[^>]+>)?"
for match in re.finditer(class_pattern, content):
modifiers = [match.group(1)] if match.group(1) else []
exports.append(
{
"name": match.group(2),
"type": "class",
"line": content[: match.start()].count("\n") + 1,
"package": package_name,
"modifiers": modifiers,
"is_abstract": "abstract" in modifiers,
"is_final": "final" in modifiers,
}
)
# Public interfaces
interface_pattern = r"(?:^|\n)\s*public\s+interface\s+(\w+)(?:<[^>]+>)?"
for match in re.finditer(interface_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "interface",
"line": content[: match.start()].count("\n") + 1,
"package": package_name,
}
)
# Public enums
enum_pattern = r"(?:^|\n)\s*public\s+enum\s+(\w+)"
for match in re.finditer(enum_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "enum",
"line": content[: match.start()].count("\n") + 1,
"package": package_name,
}
)
# Public records (Java 14+)
record_pattern = r"(?:^|\n)\s*public\s+record\s+(\w+)\s*\([^)]*\)"
for match in re.finditer(record_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "record",
"line": content[: match.start()].count("\n") + 1,
"package": package_name,
}
)
# Public methods
method_pattern = r"(?:^|\n)\s*public\s+(?:(?:static|final|abstract|synchronized|native)\s+)*(?:<[^>]+>\s+)?(?:[\w<>\[\]]+)\s+(\w+)\s*\([^)]*\)"
for match in re.finditer(method_pattern, content):
method_name = match.group(1)
# Filter out keywords that might match the pattern
if method_name not in [
"if",
"for",
"while",
"switch",
"catch",
"new",
"return",
"throw",
]:
line_content = content[match.start() : match.end()]
exports.append(
{
"name": method_name,
"type": "method",
"line": content[: match.start()].count("\n") + 1,
"is_static": "static" in line_content,
"is_final": "final" in line_content,
"is_abstract": "abstract" in line_content,
"is_synchronized": "synchronized" in line_content,
}
)
# Public fields
field_pattern = r"(?:^|\n)\s*public\s+(?:(?:static|final|volatile|transient)\s+)*(?:[\w<>\[\]]+)\s+(\w+)\s*[;=]"
for match in re.finditer(field_pattern, content):
field_name = match.group(1)
line_content = content[match.start() : match.end()]
exports.append(
{
"name": field_name,
"type": "field",
"line": content[: match.start()].count("\n") + 1,
"is_static": "static" in line_content,
"is_final": "final" in line_content,
"is_constant": "static" in line_content and "final" in line_content,
}
)
return exports
extract_structure¶
Extract code structure from Java file.
Extracts: - Package declaration - Classes with inheritance and interfaces - Interfaces with extension - Enums with values - Records (Java 14+) - Methods with full signatures - Fields with types - Annotations - Inner classes - Lambda expressions
PARAMETER | DESCRIPTION |
---|---|
content | Java source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
CodeStructure | CodeStructure object with extracted elements |
Source code in tenets/core/analysis/implementations/java_analyzer.py
def extract_structure(self, content: str, file_path: Path) -> CodeStructure:
"""Extract code structure from Java file.
Extracts:
- Package declaration
- Classes with inheritance and interfaces
- Interfaces with extension
- Enums with values
- Records (Java 14+)
- Methods with full signatures
- Fields with types
- Annotations
- Inner classes
- Lambda expressions
Args:
content: Java source code
file_path: Path to the file being analyzed
Returns:
CodeStructure object with extracted elements
"""
structure = CodeStructure()
# Extract package declaration
package_match = re.search(r"^\s*package\s+([\w.]+)\s*;", content, re.MULTILINE)
if package_match:
structure.package = package_match.group(1)
# Extract classes
class_pattern = r"(?:^|\n)\s*(?:(public|private|protected)\s+)?(?:(abstract|final)\s+)?(?:(sealed)\s+)?class\s+(\w+)(?:<([^>]+)>)?(?:\s+extends\s+([\w<>]+))?(?:\s+implements\s+([\w,\s<>]+))?"
for match in re.finditer(class_pattern, content):
visibility = match.group(1) or "package-private"
modifiers = []
if match.group(2):
modifiers.append(match.group(2))
if match.group(3):
modifiers.append(match.group(3))
class_name = match.group(4)
generics = match.group(5)
extends = match.group(6)
implements = match.group(7)
class_info = ClassInfo(
name=class_name,
line=content[: match.start()].count("\n") + 1,
visibility=visibility,
modifiers=modifiers,
generics=generics,
bases=[extends] if extends else [],
interfaces=self._parse_implements_list(implements) if implements else [],
methods=[],
fields=[],
inner_classes=[],
)
# Find class body and extract members
class_body = self._extract_class_body(content, match.end())
if class_body:
class_info.methods = self._extract_methods(class_body)
class_info.fields = self._extract_fields(class_body)
class_info.inner_classes = self._extract_inner_classes(class_body)
structure.classes.append(class_info)
# Extract interfaces
interface_pattern = r"(?:^|\n)\s*(?:(public|private|protected)\s+)?(?:(sealed)\s+)?interface\s+(\w+)(?:<([^>]+)>)?(?:\s+extends\s+([\w,\s<>]+))?"
for match in re.finditer(interface_pattern, content):
visibility = match.group(1) or "package-private"
is_sealed = match.group(2) == "sealed"
interface_name = match.group(3)
generics = match.group(4)
extends = match.group(5)
# Extract interface methods
interface_body = self._extract_class_body(content, match.end())
methods = self._extract_interface_methods(interface_body) if interface_body else []
structure.interfaces.append(
{
"name": interface_name,
"line": content[: match.start()].count("\n") + 1,
"visibility": visibility,
"is_sealed": is_sealed,
"generics": generics,
"extends": self._parse_implements_list(extends) if extends else [],
"methods": methods,
"is_functional": len(methods) == 1, # Functional interface
}
)
# Extract enums
enum_pattern = r"(?:^|\n)\s*(?:(public|private|protected)\s+)?enum\s+(\w+)(?:\s+implements\s+([\w,\s<>]+))?"
for match in re.finditer(enum_pattern, content):
visibility = match.group(1) or "package-private"
enum_name = match.group(2)
implements = match.group(3)
# Extract enum values
enum_body = self._extract_class_body(content, match.end())
values = self._extract_enum_values(enum_body) if enum_body else []
structure.enums.append(
{
"name": enum_name,
"line": content[: match.start()].count("\n") + 1,
"visibility": visibility,
"implements": self._parse_implements_list(implements) if implements else [],
"values": values,
}
)
# Extract records (Java 14+)
record_pattern = (
r"(?:^|\n)\s*(?:(public|private|protected)\s+)?record\s+(\w+)\s*\(([^)]*)\)"
)
for match in re.finditer(record_pattern, content):
visibility = match.group(1) or "package-private"
record_name = match.group(2)
components = match.group(3)
structure.records.append(
{
"name": record_name,
"line": content[: match.start()].count("\n") + 1,
"visibility": visibility,
"components": self._parse_record_components(components),
}
)
# Extract annotations used in the file
annotation_pattern = r"@(\w+)(?:\([^)]*\))?"
annotations = set()
for match in re.finditer(annotation_pattern, content):
annotations.add(match.group(1))
structure.annotations = list(annotations)
# Detect frameworks based on annotations and imports
structure.framework = self._detect_framework(content, structure.annotations)
# Count lambda expressions
lambda_pattern = r"\([^)]*\)\s*->"
structure.lambda_count = len(re.findall(lambda_pattern, content))
# Count anonymous classes
anonymous_pattern = r"new\s+[\w<>]+\s*\([^)]*\)\s*\{"
structure.anonymous_classes_count = len(re.findall(anonymous_pattern, content))
return structure
calculate_complexity¶
Calculate complexity metrics for Java code.
Calculates: - Cyclomatic complexity - Cognitive complexity - Class coupling - Inheritance depth indicators - Exception handling complexity
PARAMETER | DESCRIPTION |
---|---|
content | Java source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
ComplexityMetrics | ComplexityMetrics object with calculated metrics |
Source code in tenets/core/analysis/implementations/java_analyzer.py
def calculate_complexity(self, content: str, file_path: Path) -> ComplexityMetrics:
"""Calculate complexity metrics for Java code.
Calculates:
- Cyclomatic complexity
- Cognitive complexity
- Class coupling
- Inheritance depth indicators
- Exception handling complexity
Args:
content: Java source code
file_path: Path to the file being analyzed
Returns:
ComplexityMetrics object with calculated metrics
"""
metrics = ComplexityMetrics()
# Calculate cyclomatic complexity
complexity = 1
decision_keywords = [
r"\bif\b",
r"\belse\s+if\b",
r"\belse\b",
r"\bfor\b",
r"\bwhile\b",
r"\bdo\b",
r"\bswitch\b",
r"\bcase\b",
r"\bcatch\b",
r"\bthrow\b",
r"\b&&\b",
r"\|\|",
r"\?", # Logical operators and ternary
]
for keyword in decision_keywords:
complexity += len(re.findall(keyword, content))
# Add complexity for enhanced for loops
complexity += len(re.findall(r"for\s*\([^:]+:[^)]+\)", content))
metrics.cyclomatic = complexity
# Calculate cognitive complexity
cognitive = 0
nesting_level = 0
max_nesting = 0
lines = content.split("\n")
for line in lines:
# Skip comments
if line.strip().startswith("//") or line.strip().startswith("/*"):
continue
# Track nesting
opening_braces = line.count("{")
closing_braces = line.count("}")
nesting_level += opening_braces - closing_braces
max_nesting = max(max_nesting, nesting_level)
# Control structures with nesting penalty
control_patterns = [
(r"\bif\b", 1),
(r"\belse\s+if\b", 1),
(r"\belse\b", 0),
(r"\bfor\b", 1),
(r"\bwhile\b", 1),
(r"\bdo\b", 1),
(r"\bswitch\b", 1),
(r"\btry\b", 1),
(r"\bcatch\b", 1),
]
for pattern, weight in control_patterns:
if re.search(pattern, line):
cognitive += weight * (1 + max(0, nesting_level - 1))
# Exception handling adds complexity
if re.search(r"\bthrow\s+new\b", line):
cognitive += 2
# Nested classes add complexity
if re.search(r"\bclass\s+\w+\s*\{", line) and nesting_level > 1:
cognitive += 3
metrics.cognitive = cognitive
metrics.max_depth = max_nesting
# Count code elements
metrics.line_count = len(lines)
metrics.code_lines = self._count_code_lines(content)
metrics.comment_lines = self._count_comment_lines(content)
metrics.comment_ratio = (
metrics.comment_lines / metrics.line_count if metrics.line_count > 0 else 0
)
# Count classes and interfaces
metrics.class_count = len(re.findall(r"\bclass\s+\w+", content))
metrics.interface_count = len(re.findall(r"\binterface\s+\w+", content))
metrics.enum_count = len(re.findall(r"\benum\s+\w+", content))
metrics.record_count = len(re.findall(r"\brecord\s+\w+", content))
# Count methods
method_pattern = r"(?:public|private|protected|static|final|abstract|synchronized|native)\s+[\w<>\[\]]+\s+\w+\s*\([^)]*\)\s*(?:throws\s+[\w,\s]+)?\s*\{"
metrics.method_count = len(re.findall(method_pattern, content))
# Exception handling metrics
metrics.try_blocks = len(re.findall(r"\btry\s*\{", content))
metrics.catch_blocks = len(re.findall(r"\bcatch\s*\([^)]+\)", content))
metrics.finally_blocks = len(re.findall(r"\bfinally\s*\{", content))
# Count both method 'throws' declarations and explicit throw statements
metrics.throws_declarations = len(re.findall(r"\bthrows\s+[\w.,\s]+", content))
metrics.throws_declarations += len(re.findall(r"\bthrow\s+new\b", content))
# Annotation metrics
metrics.annotation_count = len(re.findall(r"@\w+", content))
# Inheritance metrics
metrics.extends_count = len(re.findall(r"\bextends\s+\w+", content))
metrics.implements_count = len(re.findall(r"\bimplements\s+[\w,\s]+", content))
# Lambda and stream metrics
metrics.lambda_count = len(re.findall(r"\([^)]*\)\s*->", content))
# Also count single-arg lambdas without parentheses: x -> x + 1
metrics.lambda_count += len(re.findall(r"\b[A-Za-z_]\w*\s*->", content))
metrics.stream_operations = len(
re.findall(r"\.\s*(?:stream|filter|map|reduce|collect|forEach)\s*\(", content)
)
# Calculate maintainability index
import math
if metrics.code_lines > 0:
# Adjusted for Java's verbosity
inheritance_factor = 1 - (metrics.extends_count + metrics.implements_count) * 0.05
exception_factor = 1 - (metrics.try_blocks * 0.02)
mi = (
171
- 5.2 * math.log(max(1, complexity))
- 0.23 * complexity
- 16.2 * math.log(metrics.code_lines)
+ 10 * inheritance_factor
+ 10 * exception_factor
)
metrics.maintainability_index = max(0, min(100, mi))
return metrics