Skip to content

ruby_analyzer

Full name: tenets.core.analysis.implementations.ruby_analyzer

ruby_analyzer

Ruby code analyzer.

This module provides comprehensive analysis for Ruby source files, including support for Ruby's dynamic features, metaprogramming, and DSLs.

Classes

RubyAnalyzer

Python
RubyAnalyzer()

Bases: LanguageAnalyzer

Ruby code analyzer.

Provides analysis for Ruby files including: - Require and gem dependency analysis - Class and module extraction with inheritance - Method analysis with visibility and metaprogramming - Block, proc, and lambda detection - DSL pattern recognition - Attribute accessors and metaprogramming - Ruby-specific patterns (symbols, instance variables) - Framework detection (Rails, Sinatra, RSpec)

Handles Ruby's dynamic nature and metaprogramming features.

Initialize the Ruby analyzer with logger.

Source code in tenets/core/analysis/implementations/ruby_analyzer.py
Python
def __init__(self):
    """Initialize the Ruby analyzer with logger."""
    self.logger = get_logger(__name__)
Functions
extract_imports
Python
extract_imports(content: str, file_path: Path) -> List[ImportInfo]

Extract requires and gems from Ruby code.

Handles: - require 'library' - require_relative 'file' - load 'file.rb' - gem 'gemname' - autoload :Module, 'file' - Bundler.require - conditional requires (require 'x' if ... / unless ...)

Source code in tenets/core/analysis/implementations/ruby_analyzer.py
Python
def extract_imports(self, content: str, file_path: Path) -> List[ImportInfo]:
    """Extract requires and gems from Ruby code.

    Handles:
    - require 'library'
    - require_relative 'file'
    - load 'file.rb'
    - gem 'gemname'
    - autoload :Module, 'file'
    - Bundler.require
    - conditional requires (require 'x' if ... / unless ...)
    """
    imports: List[ImportInfo] = []
    lines = content.splitlines()

    require_pattern = re.compile(r'^\s*require\s+["\']([^"\']+)["\']')
    require_relative_pattern = re.compile(r'^\s*require_relative\s+["\']([^"\']+)["\']')
    load_pattern = re.compile(r'^\s*load\s+["\']([^"\']+)["\']')
    gem_pattern = re.compile(r'^\s*gem\s+["\']([^"\']+)["\'](?:,\s*["\']([^"\']+)["\'])?')
    autoload_pattern = re.compile(r'^\s*autoload\s+:(\w+),\s*["\']([^"\']+)["\']')
    conditional_require_pattern = re.compile(
        r'^\s*require\s+["\']([^"\']+)["\']\s+(?:if|unless)\b'
    )

    for i, line in enumerate(lines, 1):
        stripped = line.strip()
        if not stripped or stripped.startswith("#"):
            continue

        # Conditional requires first (covers also standard require pattern)
        m = conditional_require_pattern.match(line)
        if m:
            mod = m.group(1)
            imports.append(
                ImportInfo(module=mod, line=i, type="conditional_require", conditional=True)
            )
            continue

        m = require_pattern.match(line)
        if m:
            mod = m.group(1)
            is_stdlib = self._is_stdlib_module(mod)
            imports.append(
                ImportInfo(
                    module=mod,
                    line=i,
                    type="require",
                    is_stdlib=is_stdlib,
                    is_gem=not is_stdlib and not mod.startswith("."),
                )
            )
            continue

        m = require_relative_pattern.match(line)
        if m:
            imports.append(
                ImportInfo(
                    module=m.group(1),
                    line=i,
                    type="require_relative",
                    is_relative=True,
                    is_project_file=True,
                )
            )
            continue

        m = load_pattern.match(line)
        if m:
            mod = m.group(1)
            imports.append(
                ImportInfo(
                    module=mod,
                    line=i,
                    type="load",
                    is_relative=mod.startswith("."),
                    reloads=True,
                )
            )
            continue

        m = gem_pattern.match(line)
        if m:
            gem_name = m.group(1)
            version = m.group(2)
            imports.append(
                ImportInfo(module=gem_name, line=i, type="gem", version=version, is_gem=True)
            )
            continue

        m = autoload_pattern.match(line)
        if m:
            imports.append(
                ImportInfo(
                    module=m.group(2),
                    alias=m.group(1),
                    line=i,
                    type="autoload",
                    is_relative=m.group(2).startswith("."),
                    lazy_load=True,
                )
            )
            continue

        if "Bundler.require" in line:
            imports.append(
                ImportInfo(
                    module="Bundler", line=i, type="bundler_require", loads_all_gems=True
                )
            )

    if file_path.name == "Gemfile":
        imports.extend(self._extract_gemfile_dependencies(content))

    return imports
extract_exports
Python
extract_exports(content: str, file_path: Path) -> List[Dict[str, Any]]

Extract public methods and classes from Ruby code.

In Ruby, everything is public by default unless specified otherwise. Module and class definitions are the primary exports.

PARAMETERDESCRIPTION
content

Ruby source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
List[Dict[str, Any]]

List of exported symbols with metadata

Source code in tenets/core/analysis/implementations/ruby_analyzer.py
Python
def extract_exports(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
    """Extract public methods and classes from Ruby code.

    In Ruby, everything is public by default unless specified otherwise.
    Module and class definitions are the primary exports.

    Args:
        content: Ruby source code
        file_path: Path to the file being analyzed

    Returns:
        List of exported symbols with metadata
    """
    exports = []

    # Classes
    class_pattern = r"^\s*class\s+(\w+)(?:\s*<\s*([\w:]+))?"
    for match in re.finditer(class_pattern, content, re.MULTILINE):
        class_name = match.group(1)
        superclass = match.group(2) if match.group(2) else "Object"

        exports.append(
            {
                "name": class_name,
                "type": "class",
                "line": content[: match.start()].count("\n") + 1,
                "superclass": superclass,
                "is_exception": "Error" in superclass or "Exception" in superclass,
            }
        )

    # Modules
    module_pattern = r"^\s*module\s+(\w+)"
    for match in re.finditer(module_pattern, content, re.MULTILINE):
        exports.append(
            {
                "name": match.group(1),
                "type": "module",
                "line": content[: match.start()].count("\n") + 1,
            }
        )

    # Top-level methods (become private methods of Object)
    # Track visibility for methods
    visibility = "public"
    class_context = None
    module_context = None

    lines = content.split("\n")
    for i, line in enumerate(lines, 1):
        # Track class/module context
        class_match = re.match(r"^\s*class\s+(\w+)", line)
        if class_match:
            class_context = class_match.group(1)
            visibility = "public"  # Reset visibility in new class
            continue

        module_match = re.match(r"^\s*module\s+(\w+)", line)
        if module_match:
            module_context = module_match.group(1)
            visibility = "public"
            continue

        # Check for end of class/module
        if re.match(r"^\s*end\s*$", line):
            if class_context or module_context:
                class_context = None
                module_context = None
                visibility = "public"
            continue

        # Track visibility changes
        if re.match(r"^\s*private\s*$", line):
            visibility = "private"
            continue
        elif re.match(r"^\s*protected\s*$", line):
            visibility = "protected"
            continue
        elif re.match(r"^\s*public\s*$", line):
            visibility = "public"
            continue

        # Methods
        method_match = re.match(r"^\s*def\s+(?:self\.)?(\w+(?:\?|!|=)?)", line)
        if method_match and visibility == "public":
            method_name = method_match.group(1)
            context = class_context or module_context or "global"

            exports.append(
                {
                    "name": method_name,
                    "type": "method",
                    "line": i,
                    "context": context,
                    "is_class_method": "self." in line,
                    "is_predicate": method_name.endswith("?"),
                    "is_bang_method": method_name.endswith("!"),
                    "is_setter": method_name.endswith("="),
                }
            )

    # Constants (UPPERCASE identifiers)
    const_pattern = r"^\s*([A-Z][A-Z0-9_]*)\s*="
    for match in re.finditer(const_pattern, content, re.MULTILINE):
        exports.append(
            {
                "name": match.group(1),
                "type": "constant",
                "line": content[: match.start()].count("\n") + 1,
            }
        )

    return exports
extract_structure
Python
extract_structure(content: str, file_path: Path) -> CodeStructure

Extract code structure from Ruby file.

Extracts: - Classes with inheritance and included modules - Modules with included/extended modules - Methods with visibility and type - Instance and class variables - Constants - Blocks, procs, and lambdas - Attribute accessors - Aliases

PARAMETERDESCRIPTION
content

Ruby source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
CodeStructure

CodeStructure object with extracted elements

Source code in tenets/core/analysis/implementations/ruby_analyzer.py
Python
def extract_structure(self, content: str, file_path: Path) -> CodeStructure:
    """Extract code structure from Ruby file.

    Extracts:
    - Classes with inheritance and included modules
    - Modules with included/extended modules
    - Methods with visibility and type
    - Instance and class variables
    - Constants
    - Blocks, procs, and lambdas
    - Attribute accessors
    - Aliases

    Args:
        content: Ruby source code
        file_path: Path to the file being analyzed

    Returns:
        CodeStructure object with extracted elements
    """
    structure = CodeStructure()

    # Extract classes with full information
    class_pattern = r"^\s*class\s+(\w+)(?:\s*<\s*([\w:]+))?"

    for match in re.finditer(class_pattern, content, re.MULTILINE):
        class_name = match.group(1)
        superclass = match.group(2)

        # Find class body
        class_start_line = content[: match.start()].count("\n") + 1
        class_body = self._extract_block_body(content, match.end(), "class")

        # Extract class components
        methods = []
        attributes = []
        included_modules = []
        extended_modules = []

        if class_body:
            methods = self._extract_methods(class_body)
            attributes = self._extract_attributes(class_body)
            included_modules = self._extract_included_modules(class_body)
            extended_modules = self._extract_extended_modules(class_body)

        class_info = ClassInfo(
            name=class_name,
            line=class_start_line,
            bases=[superclass] if superclass else [],
            methods=methods,
            attributes=attributes,
            included_modules=included_modules,
            extended_modules=extended_modules,
            is_singleton=False,
        )

        structure.classes.append(class_info)

    # Extract modules
    module_pattern = r"^\s*module\s+(\w+)"

    for match in re.finditer(module_pattern, content, re.MULTILINE):
        module_name = match.group(1)
        module_start_line = content[: match.start()].count("\n") + 1
        module_body = self._extract_block_body(content, match.end(), "module")

        methods = []
        included_modules = []
        extended_modules = []

        if module_body:
            methods = self._extract_methods(module_body)
            included_modules = self._extract_included_modules(module_body)
            extended_modules = self._extract_extended_modules(module_body)

        structure.modules.append(
            {
                "name": module_name,
                "line": module_start_line,
                "methods": methods,
                "included_modules": included_modules,
                "extended_modules": extended_modules,
            }
        )

    # Extract standalone methods (outside classes/modules)
    structure.functions = self._extract_toplevel_methods(content)

    # Extract constants
    const_pattern = r"^\s*([A-Z][A-Z0-9_]*)\s*="
    for match in re.finditer(const_pattern, content, re.MULTILINE):
        structure.constants.append(match.group(1))

    # Extract global variables
    global_var_pattern = r"\$\w+"
    global_vars = set(re.findall(global_var_pattern, content))
    structure.global_variables = list(global_vars)

    # Extract instance variables (class-level)
    ivar_pattern = r"@\w+"
    instance_vars = set(re.findall(ivar_pattern, content))
    structure.instance_variables = list(instance_vars)

    # Extract class variables
    cvar_pattern = r"@@\w+"
    class_vars = set(re.findall(cvar_pattern, content))
    structure.class_variables = list(class_vars)

    # Count blocks, procs, and lambdas
    structure.block_count = len(re.findall(r"\bdo\b|\{", content))
    structure.proc_count = len(re.findall(r"\bProc\.new\b|\bproc\b", content))
    structure.lambda_count = len(re.findall(r"\blambda\b|->|\bλ\b", content))

    # Detect Rails/framework patterns
    structure.framework = self._detect_framework(content, file_path)

    # Check for test file
    structure.is_test_file = (
        file_path.name.endswith("_test.rb")
        or file_path.name.endswith("_spec.rb")
        or file_path.parts
        and "test" in file_path.parts
        or file_path.parts
        and "spec" in file_path.parts
    )

    # Extract aliases
    alias_pattern = r"^\s*alias\s+:?(\w+)\s+:?(\w+)"
    for match in re.finditer(alias_pattern, content, re.MULTILINE):
        structure.aliases.append(
            {
                "new_name": match.group(1),
                "original_name": match.group(2),
                "line": content[: match.start()].count("\n") + 1,
            }
        )
    alias_method_pattern = r"^\s*alias_method\s+:?(\w+)\s*,\s+:?(\w+)"
    for match in re.finditer(alias_method_pattern, content, re.MULTILINE):
        structure.aliases.append(
            {
                "new_name": match.group(1),
                "original_name": match.group(2),
                "line": content[: match.start()].count("\n") + 1,
            }
        )

    # Detect singleton classes (class << self / class << obj)
    if re.search(r"^\s*class\s*<<\s*(self|\w+)", content, re.MULTILINE):
        # Mark any containing class as singleton if pattern appears inside it
        for c in structure.classes:
            # Rough check: if the singleton block appears after class start
            singleton_pos = re.search(r"^\s*class\s*<<\s*(self|\w+)", content, re.MULTILINE)
            if singleton_pos and content[: singleton_pos.start()].count("\n") + 1 >= c.line:
                try:
                    setattr(c, "is_singleton", True)
                except Exception:
                    pass

    return structure
calculate_complexity
Python
calculate_complexity(content: str, file_path: Path) -> ComplexityMetrics

Calculate complexity metrics for Ruby code.

Calculates: - Cyclomatic complexity - Cognitive complexity - ABC metrics (Assignment, Branch, Condition) - Method complexity - Metaprogramming complexity

PARAMETERDESCRIPTION
content

Ruby source code

TYPE:str

file_path

Path to the file being analyzed

TYPE:Path

RETURNSDESCRIPTION
ComplexityMetrics

ComplexityMetrics object with calculated metrics

Source code in tenets/core/analysis/implementations/ruby_analyzer.py
Python
def calculate_complexity(self, content: str, file_path: Path) -> ComplexityMetrics:
    """Calculate complexity metrics for Ruby code.

    Calculates:
    - Cyclomatic complexity
    - Cognitive complexity
    - ABC metrics (Assignment, Branch, Condition)
    - Method complexity
    - Metaprogramming complexity

    Args:
        content: Ruby source code
        file_path: Path to the file being analyzed

    Returns:
        ComplexityMetrics object with calculated metrics
    """
    metrics = ComplexityMetrics()

    # Calculate cyclomatic complexity
    complexity = 1

    decision_keywords = [
        r"\bif\b",
        r"\bunless\b",
        r"\belsif\b",
        r"\belse\b",
        r"\bwhile\b",
        r"\buntil\b",
        r"\bfor\b",
        r"\bcase\b",
        r"\bwhen\b",
        r"\brescue\b",
        r"\b&&\b",
        r"\|\|",
        r"\band\b",
        r"\bor\b",
        r"\?.*:",  # Ternary operator
    ]

    for keyword in decision_keywords:
        complexity += len(re.findall(keyword, content))

    # Add complexity for iterators (they're essentially loops)
    iterator_methods = [
        r"\.each\b",
        r"\.map\b",
        r"\.select\b",
        r"\.reject\b",
        r"\.times\b",
        r"\.upto\b",
        r"\.downto\b",
    ]
    for iterator in iterator_methods:
        complexity += len(re.findall(iterator, content))

    metrics.cyclomatic = complexity

    # Calculate cognitive complexity
    cognitive = 0
    nesting_level = 0
    max_nesting = 0

    lines = content.split("\n")
    for line in lines:
        # Skip comments
        if line.strip().startswith("#"):
            continue

        # Track nesting
        if re.search(r"\b(if|unless|while|until|for|case|def|class|module|begin)\b", line):
            cognitive += 1 + nesting_level
            nesting_level += 1
            max_nesting = max(max_nesting, nesting_level)
        elif re.search(r"\belsif\b", line):
            cognitive += 1 + nesting_level
        elif re.search(r"\brescue\b", line):
            cognitive += 1 + nesting_level
        elif re.search(r"\bend\b", line):
            nesting_level = max(0, nesting_level - 1)

        # Blocks add complexity
        if re.search(r"\bdo\b\s*\|", line) or re.search(r"\{\s*\|", line):
            cognitive += 1

    metrics.cognitive = cognitive
    metrics.max_depth = max_nesting

    # Calculate ABC metrics
    abc_metrics = self._calculate_abc_metrics(content)
    metrics.abc_score = abc_metrics["score"]
    metrics.assignments = abc_metrics["assignments"]
    metrics.branches = abc_metrics["branches"]
    metrics.conditions = abc_metrics["conditions"]

    # Count code elements
    metrics.line_count = 0 if content == "" else len(lines)
    metrics.code_lines = self._count_code_lines(content)
    metrics.comment_lines = self._count_comment_lines(content)
    metrics.comment_ratio = (
        metrics.comment_lines / metrics.line_count if metrics.line_count > 0 else 0
    )

    # Count methods and classes
    metrics.method_count = len(re.findall(r"^\s*def\s+", content, re.MULTILINE))
    metrics.class_count = len(re.findall(r"^\s*class\s+", content, re.MULTILINE))
    metrics.module_count = len(re.findall(r"^\s*module\s+", content, re.MULTILINE))

    # Metaprogramming metrics
    metaprogramming_methods = [
        "define_method",
        "method_missing",
        "const_missing",
        "class_eval",
        "instance_eval",
        "module_eval",
        "send",
        "__send__",
        "public_send",
        "define_singleton_method",
        "singleton_class",
    ]

    metaprogramming_count = 0
    for method in metaprogramming_methods:
        metaprogramming_count += len(re.findall(rf"\b{method}\b", content))

    metrics.metaprogramming_score = metaprogramming_count

    # Block metrics
    metrics.block_count = len(re.findall(r"\bdo\b|\{", content))
    metrics.proc_count = len(re.findall(r"\bProc\.new\b|\bproc\b", content))
    metrics.lambda_count = len(re.findall(r"\blambda\b|->|\bλ\b", content))

    # Test metrics
    if "_test.rb" in file_path.name or "_spec.rb" in file_path.name:
        metrics.test_count = len(
            re.findall(r"\b(?:test|it|describe|context)\s+[\'\"]", content)
        )
        metrics.assertion_count = len(re.findall(r"\bassert\b", content))
        # Count RSpec expectations (expect/should) and include asserts as expectations for robustness
        metrics.expectation_count = (
            len(re.findall(r"\bexpect\b|\bshould\b", content)) + metrics.assertion_count
        )

    # Calculate maintainability index
    import math

    if metrics.code_lines > 0:
        # Adjusted for Ruby's expressiveness
        metaprogramming_factor = 1 - (metaprogramming_count * 0.02)
        abc_factor = 1 - (metrics.abc_score / 100) if metrics.abc_score < 100 else 0

        mi = (
            171
            - 5.2 * math.log(max(1, complexity))
            - 0.23 * complexity
            - 16.2 * math.log(metrics.code_lines)
            + 10 * metaprogramming_factor
            + 10 * abc_factor
        )
        metrics.maintainability_index = max(0, min(100, mi))

    return metrics

Functions