Skip to content

cpp_analyzer

Full name: tenets.core.analysis.implementations.cpp_analyzer

cpp_analyzer

C/C++ code analyzer.

This module provides comprehensive analysis for C and C++ source files, including headers, templates, and modern C++ features.

Classes

CppAnalyzer

Python
CppAnalyzer()

Bases: LanguageAnalyzer

C/C++ code analyzer.

Provides analysis for C and C++ files including: - Include directive analysis (system and local) - Class, struct, and union extraction - Template analysis - Function and method extraction - Namespace handling - Macro and preprocessor directive analysis - Modern C++ features (auto, lambdas, smart pointers) - STL usage detection - Memory management patterns

Supports both C and C++ with appropriate feature detection.

Initialize the C++ analyzer with logger.

Source code in tenets/core/analysis/implementations/cpp_analyzer.py
Python
def __init__(self):
    """Initialize the C++ analyzer with logger."""
    self.logger = get_logger(__name__)
Functions
extract_imports
Python
extract_imports(content: str, file_path: Path) -> List[ImportInfo]

Extract includes from C/C++ code.

Handles: - System includes: #include - Local includes: #include "myheader.h" - Conditional includes with #ifdef - Include guards

PARAMETERDESCRIPTION
content

C/C++ source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
List[ImportInfo]

List of ImportInfo objects representing includes

Source code in tenets/core/analysis/implementations/cpp_analyzer.py
Python
def extract_imports(self, content: str, file_path: Path) -> List[ImportInfo]:
    """Extract includes from C/C++ code.

    Handles:
    - System includes: #include <iostream>
    - Local includes: #include "myheader.h"
    - Conditional includes with #ifdef
    - Include guards

    Args:
        content: C/C++ source code
        file_path: Path to the file being analyzed

    Returns:
        List of ImportInfo objects representing includes
    """
    imports = []
    lines = content.split("\n")

    # Track preprocessor state
    ifdef_stack = []
    current_condition = True

    for i, line in enumerate(lines, 1):
        stripped = line.strip()

        # Handle preprocessor conditionals
        if stripped.startswith("#ifdef") or stripped.startswith("#ifndef"):
            condition = stripped.split()[1] if len(stripped.split()) > 1 else ""
            ifdef_stack.append(current_condition)
            # We'll track all includes regardless of conditionals for analysis
            continue
        elif stripped.startswith("#if"):
            ifdef_stack.append(current_condition)
            continue
        elif stripped.startswith("#else"):
            if ifdef_stack:
                current_condition = not current_condition
            continue
        elif stripped.startswith("#elif"):
            continue
        elif stripped.startswith("#endif"):
            if ifdef_stack:
                current_condition = ifdef_stack.pop()
            continue

        # System includes
        system_include = re.match(r"^\s*#\s*include\s*<([^>]+)>", line)
        if system_include:
            header = system_include.group(1)
            imports.append(
                ImportInfo(
                    module=header,
                    line=i,
                    type="system",
                    is_relative=False,
                    is_stdlib=self._is_stdlib_header(header),
                    is_stl=self._is_stl_header(header),
                    conditional=len(ifdef_stack) > 0,
                )
            )
            continue

        # Local includes
        local_include = re.match(r'^\s*#\s*include\s*"([^"]+)"', line)
        if local_include:
            header = local_include.group(1)
            imports.append(
                ImportInfo(
                    module=header,
                    line=i,
                    type="local",
                    is_relative=True,
                    is_project_header=True,
                    conditional=len(ifdef_stack) > 0,
                )
            )
            continue

    # Detect include guards
    self._detect_include_guards(content, imports)

    return imports
extract_exports
Python
extract_exports(content: str, file_path: Path) -> List[Dict[str, Any]]

Extract exported symbols from C/C++ code.

In C/C++, symbols are exported by default unless static. For headers, we extract declarations. For source files, we extract non-static definitions.

PARAMETERDESCRIPTION
content

C/C++ source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
List[Dict[str, Any]]

List of exported symbols

Source code in tenets/core/analysis/implementations/cpp_analyzer.py
Python
def extract_exports(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
    """Extract exported symbols from C/C++ code.

    In C/C++, symbols are exported by default unless static.
    For headers, we extract declarations. For source files,
    we extract non-static definitions.

    Args:
        content: C/C++ source code
        file_path: Path to the file being analyzed

    Returns:
        List of exported symbols
    """
    exports = []
    is_header = file_path.suffix in [".h", ".hh", ".hpp", ".hxx", ".h++"]

    # Extract namespace if present
    namespace = self._extract_namespace(content)

    # Non-static functions
    func_pattern = r"^(?:template\s*<[^>]*>\s*)?(?!static)(?:(?:inline|extern|virtual|explicit|constexpr)\s+)*(?:[\w\s\*&:<>]+)\s+(\w+)\s*\([^)]*\)(?:\s*const)?(?:\s*noexcept)?(?:\s*override)?(?:\s*final)?(?:\s*=\s*0)?(?:\s*(?:\{|;))"

    for match in re.finditer(func_pattern, content, re.MULTILINE):
        func_name = match.group(1)
        # Filter out keywords
        if func_name not in [
            "if",
            "for",
            "while",
            "switch",
            "return",
            "delete",
            "new",
            "throw",
            "catch",
        ]:
            line_content = content[match.start() : match.end()]
            before_window = content[max(0, match.start() - 200) : match.start()]
            is_tmpl = (
                ("template" in line_content)
                or ("template" in before_window)
                or self._is_template_function(content, match.start())
            )
            exports.append(
                {
                    "name": func_name,
                    "type": "function",
                    "line": content[: match.start()].count("\n") + 1,
                    "namespace": namespace,
                    "is_inline": "inline" in line_content,
                    "is_virtual": "virtual" in line_content,
                    "is_pure_virtual": "= 0" in line_content,
                    "is_constexpr": "constexpr" in line_content,
                    "is_template": is_tmpl,
                }
            )

    # Classes and structs (public by default in struct)
    class_pattern = r"\b(?:struct|(?<!enum\s)class)\s+(?:__declspec\([^)]+\)\s+)?(\w+)(?:\s*:\s*(?:public|private|protected)\s+[\w:]+)?(?:\s*\{|;)"
    for match in re.finditer(class_pattern, content):
        class_name = match.group(1)
        is_struct = "struct" in match.group(0)
        # Find keyword position for accurate template check
        inner = match.group(0)
        kw = "struct" if "struct" in inner else "class"
        kw_pos = match.start() + inner.find(kw)

        exports.append(
            {
                "name": class_name,
                "type": "struct" if is_struct else "class",
                "line": content[: match.start()].count("\n") + 1,
                "namespace": namespace,
                "default_visibility": "public" if is_struct else "private",
                "is_template": self._is_template_class(content, kw_pos),
            }
        )

    # Enums
    enum_pattern = r"\benum\s+(?:class\s+)?(\w+)(?:\s*:\s*\w+)?(?:\s*\{|;)"

    for match in re.finditer(enum_pattern, content):
        enum_name = match.group(1)
        is_enum_class = "enum class" in match.group(0)

        exports.append(
            {
                "name": enum_name,
                "type": "enum_class" if is_enum_class else "enum",
                "line": content[: match.start()].count("\n") + 1,
                "namespace": namespace,
            }
        )

    # Unions
    union_pattern = r"\bunion\s+(\w+)(?:\s*\{|;)"

    for match in re.finditer(union_pattern, content):
        exports.append(
            {
                "name": match.group(1),
                "type": "union",
                "line": content[: match.start()].count("\n") + 1,
                "namespace": namespace,
            }
        )

    # Typedefs and using declarations
    typedef_pattern = r"\btypedef\s+.*?\s+(\w+)\s*;"

    for match in re.finditer(typedef_pattern, content):
        exports.append(
            {
                "name": match.group(1),
                "type": "typedef",
                "line": content[: match.start()].count("\n") + 1,
                "namespace": namespace,
            }
        )

    using_pattern = r"\busing\s+(\w+)\s*="

    for match in re.finditer(using_pattern, content):
        exports.append(
            {
                "name": match.group(1),
                "type": "using_alias",
                "line": content[: match.start()].count("\n") + 1,
                "namespace": namespace,
            }
        )

    # Global variables (non-static)
    if not is_header:
        var_pattern = (
            r"^(?!static)(?:extern\s+)?(?:const\s+)?(?:[\w\s\*&:<>]+)\s+(\w+)\s*(?:=|;)"
        )

        for match in re.finditer(var_pattern, content, re.MULTILINE):
            var_name = match.group(1)
            if var_name not in [
                "if",
                "for",
                "while",
                "return",
                "class",
                "struct",
                "enum",
                "typedef",
                "using",
            ]:
                exports.append(
                    {
                        "name": var_name,
                        "type": "variable",
                        "line": content[: match.start()].count("\n") + 1,
                        "namespace": namespace,
                        "is_const": "const" in match.group(0),
                        "is_extern": "extern" in match.group(0),
                    }
                )

    return exports
extract_structure
Python
extract_structure(content: str, file_path: Path) -> CodeStructure

Extract code structure from C/C++ file.

Extracts: - Namespaces - Classes and structs with inheritance - Functions and methods - Templates - Macros and preprocessor directives - Global variables - Operator overloads

PARAMETERDESCRIPTION
content

C/C++ source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
CodeStructure

CodeStructure object with extracted elements

Source code in tenets/core/analysis/implementations/cpp_analyzer.py
Python
def extract_structure(self, content: str, file_path: Path) -> CodeStructure:
    """Extract code structure from C/C++ file.

    Extracts:
    - Namespaces
    - Classes and structs with inheritance
    - Functions and methods
    - Templates
    - Macros and preprocessor directives
    - Global variables
    - Operator overloads

    Args:
        content: C/C++ source code
        file_path: Path to the file being analyzed

    Returns:
        CodeStructure object with extracted elements
    """
    structure = CodeStructure()

    # Determine if it's C or C++
    is_cpp = self._is_cpp_file(file_path, content)
    structure.language_variant = "C++" if is_cpp else "C"

    # Extract namespaces (C++ only)
    if is_cpp:
        namespace_pattern = r"namespace\s+(\w+)\s*\{"
        for match in re.finditer(namespace_pattern, content):
            structure.namespaces.append(
                {"name": match.group(1), "line": content[: match.start()].count("\n") + 1}
            )

    # Extract classes and structs
    class_pattern = r"(?:template\s*<[^>]+>\s*)?(?:struct|(?<!enum\s)class)\s+(\w+)(?:\s*:\s*((?:public|private|protected)\s+[\w:]+(?:\s*,\s*(?:public|private|protected)\s+[\w:]+)*))?"

    for match in re.finditer(class_pattern, content):
        class_name = match.group(1)
        inheritance = match.group(2)

        # Parse inheritance
        bases = []
        if inheritance:
            for base in inheritance.split(","):
                base = base.strip()
                # Remove access specifier
                base = re.sub(r"^(public|private|protected)\s+", "", base)
                bases.append(base)

        # Find class body
        class_start = match.end()
        class_body = self._extract_class_body(content, class_start)

        # Extract methods and members
        methods = []
        fields = []

        if class_body:
            methods = self._extract_class_methods(class_body)
            fields = self._extract_class_fields(class_body)

        inner = match.group(0)
        kw = "struct" if "struct" in inner else "class"
        kw_pos = match.start() + inner.find(kw)
        class_info = ClassInfo(
            name=class_name,
            line=content[: match.start()].count("\n") + 1,
            bases=bases,
            methods=methods,
            fields=fields,
            is_struct="struct" in match.group(0),
            is_template=self._is_template_class(content, kw_pos),
        )

        structure.classes.append(class_info)

    # Extract standalone functions
    func_pattern = r"(?:template\s*<[^>]+>\s*)?(?:(?:inline|static|extern|virtual|explicit|constexpr)\s+)*(?:[\w\s\*&:<>]+)\s+(\w+)\s*\([^)]*\)(?:\s*const)?(?:\s*noexcept)?(?:\s*\{|;)"

    for match in re.finditer(func_pattern, content, re.MULTILINE):
        func_name = match.group(1)

        # Filter out keywords and methods
        if func_name in [
            "if",
            "for",
            "while",
            "switch",
            "return",
            "delete",
            "new",
            "throw",
            "catch",
        ]:
            continue

        # Check if it's inside a class (simple heuristic)
        if self._is_inside_class(content, match.start()):
            continue

        func_info = FunctionInfo(
            name=func_name,
            line=content[: match.start()].count("\n") + 1,
            is_static="static" in match.group(0),
            is_inline="inline" in match.group(0),
            is_constexpr="constexpr" in match.group(0),
            is_template="template" in content[max(0, match.start() - 100) : match.start()],
            is_exported="static" not in match.group(0),
        )

        structure.functions.append(func_info)

    # Extract templates
    template_pattern = r"template\s*<([^>]+)>\s*(?:class|struct|typename|function)\s+(\w+)"

    for match in re.finditer(template_pattern, content):
        structure.templates.append(
            {
                "name": match.group(2),
                "parameters": match.group(1),
                "line": content[: match.start()].count("\n") + 1,
            }
        )

    # Extract macros
    macro_pattern = r"^\s*#define\s+(\w+)(?:\([^)]*\))?"

    for match in re.finditer(macro_pattern, content, re.MULTILINE):
        macro_name = match.group(1)
        is_function_macro = "(" in match.group(0)

        structure.macros.append(
            {
                "name": macro_name,
                "line": content[: match.start()].count("\n") + 1,
                "is_function_macro": is_function_macro,
            }
        )

    # Extract global variables
    global_var_pattern = (
        r"^(?:static\s+)?(?:const\s+)?(?:[\w\s\*&:<>]+)\s+(\w+)\s*(?:=\s*[^;]+)?\s*;"
    )

    for match in re.finditer(global_var_pattern, content, re.MULTILINE):
        var_name = match.group(1)

        # Filter out function declarations and keywords
        if var_name in ["if", "for", "while", "return", "class", "struct", "enum", "typedef"]:
            continue

        if not self._is_inside_class(content, match.start()) and not self._is_inside_function(
            content, match.start()
        ):
            structure.variables.append(
                {
                    "name": var_name,
                    "line": content[: match.start()].count("\n") + 1,
                    "type": "global",
                    "is_static": "static" in match.group(0),
                    "is_const": "const" in match.group(0),
                }
            )

    # Extract unions
    union_pattern = r"union\s+(\w+)\s*\{"

    for match in re.finditer(union_pattern, content):
        structure.unions.append(
            {"name": match.group(1), "line": content[: match.start()].count("\n") + 1}
        )

    # Extract operator overloads
    operator_pattern = r"operator\s*(?:[\+\-\*\/\%\^\&\|\~\!\=\<\>\[\]\(\)]|\+\+|\-\-|\<\<|\>\>|\=\=|\!\=|\<\=|\>\=|\&\&|\|\||\+\=|\-\=|\*\=|\/\=|\%\=|\^\=|\&\=|\|\=|\<\<\=|\>\>\=|,|->\*?|new|delete)(?:\s*\[\])?"

    operator_count = len(re.findall(operator_pattern, content))
    structure.operator_overloads = operator_count

    # Detect STL usage (boolean for test compatibility)
    stl_types_found = self._detect_stl_usage(content)
    structure.uses_stl = bool(stl_types_found)
    structure.stl_types = stl_types_found  # Optionally keep the list for other uses

    # Detect smart pointers
    structure.smart_pointers = self._detect_smart_pointers(content)

    # Count lambda expressions
    lambda_pattern = r"\[[^\]]*\]\s*\([^)]*\)\s*(?:->[\w\s]+)?\s*\{"
    structure.lambda_count = len(re.findall(lambda_pattern, content))

    return structure
calculate_complexity
Python
calculate_complexity(content: str, file_path: Path) -> ComplexityMetrics

Calculate complexity metrics for C/C++ code.

Calculates: - Cyclomatic complexity - Cognitive complexity - Preprocessor complexity - Template complexity - Memory management complexity

PARAMETERDESCRIPTION
content

C/C++ source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
ComplexityMetrics

ComplexityMetrics object with calculated metrics

Source code in tenets/core/analysis/implementations/cpp_analyzer.py
Python
def calculate_complexity(self, content: str, file_path: Path) -> ComplexityMetrics:
    """Calculate complexity metrics for C/C++ code.

    Calculates:
    - Cyclomatic complexity
    - Cognitive complexity
    - Preprocessor complexity
    - Template complexity
    - Memory management complexity

    Args:
        content: C/C++ source code
        file_path: Path to the file being analyzed

    Returns:
        ComplexityMetrics object with calculated metrics
    """
    metrics = ComplexityMetrics()

    # Calculate cyclomatic complexity
    complexity = 1

    decision_keywords = [
        r"\bif\b",
        r"\belse\s+if\b",
        r"\belse\b",
        r"\bfor\b",
        r"\bwhile\b",
        r"\bdo\b",
        r"\bswitch\b",
        r"\bcase\b",
        r"\bcatch\b",
        r"\b&&\b",
        r"\|\|",
        r"\?",
    ]

    for keyword in decision_keywords:
        complexity += len(re.findall(keyword, content))

    metrics.cyclomatic = complexity

    # Calculate cognitive complexity
    cognitive = 0
    nesting_level = 0
    max_nesting = 0

    lines = content.split("\n")
    for line in lines:
        # Skip comments and preprocessor directives
        if (
            line.strip().startswith("//")
            or line.strip().startswith("/*")
            or line.strip().startswith("#")
        ):
            continue

        # Track nesting
        opening_braces = line.count("{")
        closing_braces = line.count("}")
        nesting_level += opening_braces - closing_braces
        max_nesting = max(max_nesting, nesting_level)

        # Control structures with nesting penalty
        control_patterns = [
            (r"\bif\b", 1),
            (r"\bfor\b", 1),
            (r"\bwhile\b", 1),
            (r"\bswitch\b", 1),
            (r"\btry\b", 1),
            (r"\bcatch\b", 1),
        ]

        for pattern, weight in control_patterns:
            if re.search(pattern, line):
                cognitive += weight * (1 + max(0, nesting_level - 1))

    metrics.cognitive = cognitive
    metrics.max_depth = max_nesting

    # Count code elements
    metrics.line_count = len(lines)
    metrics.code_lines = self._count_code_lines(content)
    metrics.comment_lines = self._count_comment_lines(content)
    metrics.comment_ratio = (
        metrics.comment_lines / metrics.line_count if metrics.line_count > 0 else 0
    )

    # Count functions
    metrics.function_count = len(re.findall(r"[\w\s\*&:<>]+\s+\w+\s*\([^)]*\)\s*\{", content))

    # Count classes and structs
    metrics.class_count = len(re.findall(r"\b(?:class|struct)\s+\w+", content))

    # Template metrics
    metrics.template_count = len(re.findall(r"template\s*<", content))
    metrics.template_specializations = len(re.findall(r"template\s*<>", content))

    # Preprocessor metrics
    metrics.macro_count = len(re.findall(r"^\s*#define\s+", content, re.MULTILINE))
    metrics.ifdef_count = len(re.findall(r"^\s*#if(?:def|ndef)?\s+", content, re.MULTILINE))
    metrics.include_count = len(re.findall(r"^\s*#include\s+", content, re.MULTILINE))

    # Memory management metrics
    metrics.new_count = len(re.findall(r"\bnew\s+", content))
    # Count delete and delete[]
    metrics.delete_count = len(re.findall(r"\bdelete\s*(?:\[\])?", content))
    metrics.malloc_count = len(re.findall(r"\bmalloc\s*\(", content))
    metrics.free_count = len(re.findall(r"\bfree\s*\(", content))

    # Smart pointer usage (count both types and factory helpers)
    metrics.unique_ptr_count = len(re.findall(r"\bunique_ptr\s*<", content)) + len(
        re.findall(r"(?:\b[\w:]+::)?make_unique(?:\s*<[^>]+>)?\s*\(", content)
    )
    metrics.shared_ptr_count = len(re.findall(r"\bshared_ptr\s*<", content)) + len(
        re.findall(r"(?:\b[\w:]+::)?make_shared(?:\s*<[^>]+>)?\s*\(", content)
    )
    metrics.weak_ptr_count = len(re.findall(r"\bweak_ptr\s*<", content))

    # RAII indicators
    metrics.uses_raii = (
        metrics.unique_ptr_count > 0 or metrics.shared_ptr_count > 0 or "RAII" in content
    )

    # Calculate memory safety score
    manual_memory = (
        metrics.new_count + metrics.delete_count + metrics.malloc_count + metrics.free_count
    )
    smart_memory = metrics.unique_ptr_count + metrics.shared_ptr_count

    if manual_memory + smart_memory > 0:
        metrics.memory_safety_score = smart_memory / (manual_memory + smart_memory)
    else:
        metrics.memory_safety_score = 1.0

    # Calculate maintainability index
    if metrics.code_lines > 0:
        # Adjusted for C++ complexity
        template_factor = 1 - (metrics.template_count * 0.02)
        memory_factor = metrics.memory_safety_score

        mi = (
            171
            - 5.2 * math.log(max(1, complexity))
            - 0.23 * complexity
            - 16.2 * math.log(metrics.code_lines)
            + 10 * template_factor
            + 15 * memory_factor
        )
        metrics.maintainability_index = max(0, min(100, mi))

    return metrics

Functions