php_analyzer
¶
Full name: tenets.core.analysis.implementations.php_analyzer
php_analyzer¶
PHP code analyzer.
This module provides comprehensive analysis for PHP source files, including support for modern PHP features, namespaces, and frameworks.
Classes¶
PhpAnalyzer¶
Bases: LanguageAnalyzer
PHP code analyzer.
Provides analysis for PHP files including: - Include/require analysis with variations - Namespace and use statement handling - Class, trait, and interface extraction - Function and method analysis with type hints - Property analysis with visibility - PHP 7+ features (typed properties, return types) - PHP 8+ features (attributes, union types, enums) - Framework detection (Laravel, Symfony, WordPress) - Composer dependency analysis
Handles both procedural and object-oriented PHP code.
Initialize the PHP analyzer with logger.
Source code in tenets/core/analysis/implementations/php_analyzer.py
Functions¶
extract_imports¶
Extract imports from PHP code.
Handles: - use statements (classes, functions, constants) - include/require statements - include_once/require_once - Composer autoload - Namespace imports
PARAMETER | DESCRIPTION |
---|---|
content | PHP source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[ImportInfo] | List of ImportInfo objects with import details |
Source code in tenets/core/analysis/implementations/php_analyzer.py
def extract_imports(self, content: str, file_path: Path) -> List[ImportInfo]:
"""Extract imports from PHP code.
Handles:
- use statements (classes, functions, constants)
- include/require statements
- include_once/require_once
- Composer autoload
- Namespace imports
Args:
content: PHP source code
file_path: Path to the file being analyzed
Returns:
List of ImportInfo objects with import details
"""
imports = []
lines = content.split("\n")
# Track current namespace
current_namespace = None
for i, line in enumerate(lines, 1):
# Skip comments
if (
line.strip().startswith("//")
or line.strip().startswith("/*")
or line.strip().startswith("*")
):
continue
# Namespace declaration
namespace_match = re.match(r"^\s*namespace\s+([\w\\]+)\s*;", line)
if namespace_match:
current_namespace = namespace_match.group(1)
continue
# Use statements
use_pattern = re.compile(
r"^\s*use\s+((?:function|const)\s+)?([\w\\]+)(?:\s+as\s+(\w+))?\s*;"
)
match = use_pattern.match(line)
if match:
import_type = match.group(1).strip() if match.group(1) else "class"
module = match.group(2)
alias = match.group(3)
imports.append(
ImportInfo(
module=module,
alias=alias,
line=i,
type=f"use_{import_type}",
is_relative=False,
namespace=current_namespace,
import_type=import_type,
)
)
continue
# Group use statements (PHP 7+)
group_use_pattern = re.compile(r"^\s*use\s+([\w\\]+)\\{([^}]+)}\s*;")
match = group_use_pattern.match(line)
if match:
base_namespace = match.group(1)
imports_list = match.group(2)
for item in imports_list.split(","):
item = item.strip()
if " as " in item:
name, alias = item.split(" as ")
name = name.strip()
alias = alias.strip()
else:
name = item
alias = None
imports.append(
ImportInfo(
module=f"{base_namespace}\\{name}",
alias=alias,
line=i,
type="use_group",
is_relative=False,
namespace=current_namespace,
)
)
continue
# Include/require patterns
include_patterns = [
(r'include\s+[\'"]([^\'"]+)[\'"]', "include"),
(r'include_once\s+[\'"]([^\'"]+)[\'"]', "include_once"),
(r'require\s+[\'"]([^\'"]+)[\'"]', "require"),
(r'require_once\s+[\'"]([^\'"]+)[\'"]', "require_once"),
(r'include\s*\(?\s*[\'"]([^\'"]+)[\'"]\s*\)?', "include"),
(r'require\s*\(?\s*[\'"]([^\'"]+)[\'"]\s*\)?', "require"),
]
for pattern, include_type in include_patterns:
match = re.search(pattern, line)
if match:
file_path_str = match.group(1)
imports.append(
ImportInfo(
module=file_path_str,
line=i,
type=include_type,
is_relative=not file_path_str.startswith("/"),
is_file_include=True,
)
)
break
# Dynamic includes with variables or path expressions
dynamic_include = re.search(r"(?:include|require)(?:_once)?\s*\(?\s*\$\w+", line)
dynamic_dir_include = re.search(
r"(?:include|require)(?:_once)?\s*\(?\s*(?:__DIR__|dirname\s*\(\s*__FILE__\s*\))",
line,
)
concat_include = re.search(r"(?:include|require)(?:_once)?[^;]*\.[^;]*;", line)
if dynamic_include or dynamic_dir_include or concat_include:
imports.append(
ImportInfo(
module="<dynamic>",
line=i,
type="dynamic_include",
is_relative=False,
is_dynamic=True,
)
)
# Composer autoload
if "vendor/autoload.php" in line:
imports.append(
ImportInfo(
module="composer_autoload",
line=i,
type="composer",
is_relative=False,
is_autoload=True,
)
)
# Check for composer.json dependencies
if file_path.name.lower() == "composer.json":
imports.extend(self._extract_composer_dependencies(content))
return imports
extract_exports¶
Extract public members from PHP code.
PHP doesn't have explicit exports, but public classes, functions, and constants are accessible from other files.
PARAMETER | DESCRIPTION |
---|---|
content | PHP source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[Dict[str, Any]] | List of exported (public) symbols |
Source code in tenets/core/analysis/implementations/php_analyzer.py
def extract_exports(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
"""Extract public members from PHP code.
PHP doesn't have explicit exports, but public classes, functions,
and constants are accessible from other files.
Args:
content: PHP source code
file_path: Path to the file being analyzed
Returns:
List of exported (public) symbols
"""
exports = []
# Extract namespace
namespace_match = re.search(r"^\s*namespace\s+([\w\\]+)\s*;", content, re.MULTILINE)
namespace = namespace_match.group(1) if namespace_match else None
# Public classes
class_pattern = r"(?:^|\n)\s*(?:(abstract|final)\s+)?class\s+(\w+)(?:\s+extends\s+([\w\\]+))?(?:\s+implements\s+([\w\\,\s]+))?"
for match in re.finditer(class_pattern, content):
modifiers = []
if match.group(1):
modifiers.append(match.group(1))
exports.append(
{
"name": match.group(2),
"type": "class",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
"modifiers": modifiers,
"extends": match.group(3),
"implements": (
self._parse_implements_list(match.group(4)) if match.group(4) else []
),
}
)
# Interfaces
interface_pattern = r"(?:^|\n)\s*interface\s+(\w+)(?:\s+extends\s+([\w\\,\s]+))?"
for match in re.finditer(interface_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "interface",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
"extends": (
self._parse_implements_list(match.group(2)) if match.group(2) else []
),
}
)
# Traits
trait_pattern = r"(?:^|\n)\s*trait\s+(\w+)"
for match in re.finditer(trait_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "trait",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
}
)
# Enums (PHP 8.1+)
enum_pattern = r"(?:^|\n)\s*enum\s+(\w+)(?:\s*:\s*(\w+))?"
for match in re.finditer(enum_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "enum",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
"backed_type": match.group(2),
}
)
# Global functions
function_pattern = r"(?:^|\n)\s*function\s+(\w+)\s*\("
# Track if we're inside a class
class_ranges = []
for match in re.finditer(r"(?:class|trait|interface)\s+\w+[^{]*\{", content):
start = match.end()
brace_count = 1
pos = start
while pos < len(content) and brace_count > 0:
if content[pos] == "{":
brace_count += 1
elif content[pos] == "}":
brace_count -= 1
pos += 1
class_ranges.append((start, pos))
for match in re.finditer(function_pattern, content):
func_pos = match.start()
# Check if function is inside a class
is_inside_class = any(start <= func_pos < end for start, end in class_ranges)
if not is_inside_class:
exports.append(
{
"name": match.group(1),
"type": "function",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
}
)
# Constants
const_pattern = r"(?:^|\n)\s*const\s+(\w+)\s*="
define_pattern = r'define\s*\(\s*[\'"](\w+)[\'"]'
for match in re.finditer(const_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "constant",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
}
)
for match in re.finditer(define_pattern, content):
exports.append(
{
"name": match.group(1),
"type": "constant",
"line": content[: match.start()].count("\n") + 1,
"namespace": namespace,
"defined_with": "define",
}
)
return exports
extract_structure¶
Extract code structure from PHP file.
Extracts: - Namespace declaration - Classes with inheritance and traits - Interfaces with extension - Traits with composition - Enums (PHP 8.1+) - Functions with type hints - Properties with visibility and types - Methods with return types - PHP attributes/annotations
PARAMETER | DESCRIPTION |
---|---|
content | PHP source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
CodeStructure | CodeStructure object with extracted elements |
Source code in tenets/core/analysis/implementations/php_analyzer.py
def extract_structure(self, content: str, file_path: Path) -> CodeStructure:
"""Extract code structure from PHP file.
Extracts:
- Namespace declaration
- Classes with inheritance and traits
- Interfaces with extension
- Traits with composition
- Enums (PHP 8.1+)
- Functions with type hints
- Properties with visibility and types
- Methods with return types
- PHP attributes/annotations
Args:
content: PHP source code
file_path: Path to the file being analyzed
Returns:
CodeStructure object with extracted elements
"""
structure = CodeStructure()
# Extract namespace
namespace_match = re.search(r"^\s*namespace\s+([\w\\]+)\s*;", content, re.MULTILINE)
if namespace_match:
structure.namespace = namespace_match.group(1)
# Extract classes
class_pattern = r"(?:^|\n)\s*(?:(abstract|final)\s+)?class\s+(\w+)(?:\s+extends\s+([\w\\]+))?(?:\s+implements\s+([\w\\,\s]+))?"
for match in re.finditer(class_pattern, content):
class_name = match.group(2)
modifiers = []
if match.group(1):
modifiers.append(match.group(1))
extends = match.group(3)
implements = self._parse_implements_list(match.group(4)) if match.group(4) else []
# Find class body
class_body = self._extract_block_body(content, match.end())
# Extract class components
methods = []
properties = []
traits_used = []
constants = []
if class_body:
methods = self._extract_methods(class_body)
properties = self._extract_properties(class_body)
traits_used = self._extract_used_traits(class_body)
constants = self._extract_class_constants(class_body)
class_info = ClassInfo(
name=class_name,
line=content[: match.start()].count("\n") + 1,
modifiers=modifiers,
bases=[extends] if extends else [],
interfaces=implements,
methods=methods,
properties=properties,
traits_used=traits_used,
constants=constants,
)
structure.classes.append(class_info)
# Extract interfaces
interface_pattern = r"(?:^|\n)\s*interface\s+(\w+)(?:\s+extends\s+([\w\\,\s]+))?"
for match in re.finditer(interface_pattern, content):
interface_name = match.group(1)
extends = self._parse_implements_list(match.group(2)) if match.group(2) else []
# Extract interface methods
interface_body = self._extract_block_body(content, match.end())
methods = self._extract_interface_methods(interface_body) if interface_body else []
structure.interfaces.append(
{
"name": interface_name,
"line": content[: match.start()].count("\n") + 1,
"extends": extends,
"methods": methods,
}
)
# Extract traits
trait_pattern = r"(?:^|\n)\s*trait\s+(\w+)"
for match in re.finditer(trait_pattern, content):
trait_name = match.group(1)
# Extract trait body
trait_body = self._extract_block_body(content, match.end())
methods = []
properties = []
traits_used = []
if trait_body:
methods = self._extract_methods(trait_body)
properties = self._extract_properties(trait_body)
traits_used = self._extract_used_traits(trait_body)
structure.traits.append(
{
"name": trait_name,
"line": content[: match.start()].count("\n") + 1,
"methods": methods,
"properties": properties,
"uses": traits_used,
}
)
# Extract enums (PHP 8.1+)
enum_pattern = r"(?:^|\n)\s*enum\s+(\w+)(?:\s*:\s*(\w+))?"
for match in re.finditer(enum_pattern, content):
enum_name = match.group(1)
backed_type = match.group(2)
# Extract enum cases
enum_body = self._extract_block_body(content, match.end())
cases = self._extract_enum_cases(enum_body) if enum_body else []
structure.enums.append(
{
"name": enum_name,
"line": content[: match.start()].count("\n") + 1,
"backed_type": backed_type,
"cases": cases,
}
)
# Extract global functions
structure.functions = self._extract_global_functions(content)
# Extract global constants
const_pattern = r"(?:^|\n)\s*const\s+(\w+)\s*="
for match in re.finditer(const_pattern, content):
structure.constants.append(match.group(1))
define_pattern = r'define\s*\(\s*[\'"](\w+)[\'"]'
for match in re.finditer(define_pattern, content):
structure.constants.append(match.group(1))
# Extract global variables
global_var_pattern = r'\$GLOBALS\[[\'"](\w+)[\'"]\]'
for match in re.finditer(global_var_pattern, content):
structure.global_variables.append(f"${match.group(1)}")
# Detect superglobals usage
superglobals = [
"$_GET",
"$_POST",
"$_SESSION",
"$_COOKIE",
"$_FILES",
"$_SERVER",
"$_ENV",
"$_REQUEST",
]
structure.superglobals_used = [sg for sg in superglobals if sg in content]
# Detect framework
structure.framework = self._detect_framework(content, file_path)
# Check for test file
structure.is_test_file = (
"Test.php" in file_path.name
or "test.php" in file_path.name.lower()
or file_path.parts
and "tests" in file_path.parts
)
# Count anonymous functions/closures
structure.closure_count = len(
re.findall(r"function\s*\([^)]*\)\s*(?:use\s*\([^)]*\))?\s*\{", content)
)
# Count arrow functions (PHP 7.4+)
structure.arrow_function_count = len(re.findall(r"fn\s*\([^)]*\)\s*=>", content))
# Count anonymous classes
structure.anonymous_classes_count = len(
re.findall(
r"new\s+class(?:\s*\([^)]*\))?\s*(?:extends\s+[\w\\]+)?\s*(?:implements\s+[\w\\,\s]+)?\s*\{",
content,
)
)
return structure
calculate_complexity¶
Calculate complexity metrics for PHP code.
Calculates: - Cyclomatic complexity - Cognitive complexity - Nesting depth - Class coupling - PHP-specific metrics
PARAMETER | DESCRIPTION |
---|---|
content | PHP source code TYPE: |
file_path | Path to the file being analyzed TYPE: |
RETURNS | DESCRIPTION |
---|---|
ComplexityMetrics | ComplexityMetrics object with calculated metrics |
Source code in tenets/core/analysis/implementations/php_analyzer.py
def calculate_complexity(self, content: str, file_path: Path) -> ComplexityMetrics:
"""Calculate complexity metrics for PHP code.
Calculates:
- Cyclomatic complexity
- Cognitive complexity
- Nesting depth
- Class coupling
- PHP-specific metrics
Args:
content: PHP source code
file_path: Path to the file being analyzed
Returns:
ComplexityMetrics object with calculated metrics
"""
metrics = ComplexityMetrics()
# Calculate cyclomatic complexity
complexity = 1
decision_keywords = [
r"\bif\b",
r"\belseif\b",
r"\belse\b",
r"\bwhile\b",
r"\bfor\b",
r"\bforeach\b",
r"\bdo\b",
r"\bswitch\b",
r"\bcase\b",
r"\bcatch\b",
r"\bfinally\b",
r"\b\?\s*[^:]+\s*:",
r"\b&&\b",
r"\|\|",
r"\band\b",
r"\bor\b",
r"\bxor\b",
r"\?\?", # Null coalescing operator
]
for keyword in decision_keywords:
complexity += len(re.findall(keyword, content))
# Add complexity for match expressions (PHP 8+)
complexity += len(re.findall(r"\bmatch\s*\(", content))
metrics.cyclomatic = complexity
# Calculate cognitive complexity
cognitive = 0
nesting_level = 0
max_nesting = 0
lines = content.split("\n")
for line in lines:
# Skip comments
if (
line.strip().startswith("//")
or line.strip().startswith("/*")
or line.strip().startswith("*")
):
continue
# Track nesting
opening_braces = line.count("{")
closing_braces = line.count("}")
nesting_level += opening_braces - closing_braces
max_nesting = max(max_nesting, nesting_level)
# Control structures with nesting penalty
control_patterns = [
(r"\bif\b", 1),
(r"\belseif\b", 1),
(r"\belse\b", 0),
(r"\bfor\b", 1),
(r"\bforeach\b", 1),
(r"\bwhile\b", 1),
(r"\bswitch\b", 1),
(r"\btry\b", 1),
(r"\bcatch\b", 1),
(r"\bmatch\b", 1),
]
for pattern, weight in control_patterns:
if re.search(pattern, line):
cognitive += weight * (1 + max(0, nesting_level - 1))
metrics.cognitive = cognitive
metrics.max_depth = max_nesting
# Count code elements
metrics.line_count = len(lines)
metrics.code_lines = self._count_code_lines(content)
metrics.comment_lines = self._count_comment_lines(content)
metrics.comment_ratio = (
metrics.comment_lines / metrics.line_count if metrics.line_count > 0 else 0
)
# Count classes, interfaces, traits
metrics.class_count = len(re.findall(r"\bclass\s+\w+", content))
metrics.interface_count = len(re.findall(r"\binterface\s+\w+", content))
metrics.trait_count = len(re.findall(r"\btrait\s+\w+", content))
metrics.enum_count = len(re.findall(r"\benum\s+\w+", content))
# Count functions/methods
metrics.function_count = len(re.findall(r"\bfunction\s+\w+\s*\(", content))
# Exception handling metrics
metrics.try_blocks = len(re.findall(r"\btry\s*\{", content))
metrics.catch_blocks = len(re.findall(r"\bcatch\s*\([^)]+\)", content))
metrics.finally_blocks = len(re.findall(r"\bfinally\s*\{", content))
metrics.throw_statements = len(re.findall(r"\bthrow\s+new\s+", content))
# PHP-specific metrics
metrics.global_usage = len(re.findall(r"\$GLOBALS\[", content))
metrics.superglobal_usage = len(
re.findall(r"\$_(?:GET|POST|SESSION|COOKIE|FILES|SERVER|ENV|REQUEST)\[", content)
)
metrics.eval_usage = len(re.findall(r"\beval\s*\(", content))
metrics.dynamic_calls = len(re.findall(r"\$\w+\s*\(", content)) # Variable functions
# Type hint metrics
metrics.type_hints = len(
re.findall(
r":\s*(?:\?)?(?:int|string|bool|float|array|object|callable|iterable|mixed|void|self|parent|static|[\w\\]+)",
content,
)
)
metrics.nullable_types = len(
re.findall(
r"\?(?:int|string|bool|float|array|object|callable|iterable|mixed|[\w\\]+)", content
)
)
metrics.union_types = len(re.findall(r":\s*[\w\\]+\|[\w\\]+", content))
# Attribute/Annotation metrics
metrics.attributes = len(re.findall(r"#\[[\w\\]+", content))
metrics.doc_comments = len(re.findall(r"/\*\*", content))
# Calculate maintainability index
import math
if metrics.code_lines > 0:
# Adjusted for PHP
global_factor = 1 - (metrics.global_usage + metrics.superglobal_usage) * 0.01
type_factor = (
min(1.0, metrics.type_hints / metrics.function_count)
if metrics.function_count > 0
else 0
)
mi = (
171
- 5.2 * math.log(max(1, complexity))
- 0.23 * complexity
- 16.2 * math.log(metrics.code_lines)
+ 10 * global_factor
+ 10 * type_factor
)
metrics.maintainability_index = max(0, min(100, mi))
return metrics