Skip to content

scanner

Full name: tenets.utils.scanner

scanner

File scanning utilities.

This module provides functionality for discovering files in a codebase, respecting ignore patterns and filtering rules.

Classes

FileScanner

Python
FileScanner(config: TenetsConfig)

Scans directories for files matching criteria.

Initialize the scanner.

PARAMETERDESCRIPTION
config

Tenets configuration

TYPE:TenetsConfig

Source code in tenets/utils/scanner.py
Python
def __init__(self, config: TenetsConfig):
    """Initialize the scanner.

    Args:
        config: Tenets configuration
    """
    self.config = config
    self.logger = get_logger(__name__)

    # Log multiprocessing configuration
    from tenets.utils.multiprocessing import get_scanner_workers, log_worker_info

    self.workers = get_scanner_workers(config)
    parallel_mode = getattr(config.scanner, "parallel_mode", "auto") if config else "auto"
    log_worker_info(self.logger, "FileScanner", self.workers)
    self.logger.info(f"FileScanner initialized (parallel_mode: {parallel_mode})")

    # Build ignore patterns
    self.ignore_patterns = set(self.DEFAULT_IGNORE_PATTERNS)
    if (
        config
        and hasattr(config, "additional_ignore_patterns")
        and config.additional_ignore_patterns
    ):
        self.ignore_patterns.update(config.additional_ignore_patterns)

    # Add minified file patterns if exclude_minified is True (default)
    self.exclude_minified = getattr(config, "exclude_minified", True) if config else True
    if self.exclude_minified:
        # Add minified patterns
        minified_patterns = getattr(config, "minified_patterns", []) if config else []
        if minified_patterns:
            self.ignore_patterns.update(minified_patterns)
        else:
            # Default minified patterns
            self.ignore_patterns.update(
                [
                    "*.min.js",
                    "*.min.css",
                    "bundle.js",
                    "*.bundle.js",
                    "*.bundle.css",
                    "*.production.js",
                    "*.prod.js",
                    "vendor.prod.js",
                    "*.dist.js",
                    "*.compiled.js",
                ]
            )

        # Add build directory patterns
        build_dirs = getattr(config, "build_directory_patterns", []) if config else []
        if build_dirs:
            # Remove trailing slashes for directory name matching
            self.ignore_patterns.update(d.rstrip("/") for d in build_dirs)
        else:
            # Default build directories (without trailing slashes)
            self.ignore_patterns.update(["dist", "build", "out", "output", "node_modules"])
Functions
scan
Python
scan(paths: List[Path], include_patterns: Optional[List[str]] = None, exclude_patterns: Optional[List[str]] = None, follow_symlinks: bool = False, respect_gitignore: bool = True, max_file_size: Optional[int] = None) -> List[Path]

Scan paths for files matching criteria.

PARAMETERDESCRIPTION
paths

Paths to scan (files or directories)

TYPE:List[Path]

include_patterns

Patterns of files to include (e.g., "*.py")

TYPE:Optional[List[str]]DEFAULT:None

exclude_patterns

Additional patterns to exclude

TYPE:Optional[List[str]]DEFAULT:None

follow_symlinks

Whether to follow symbolic links

TYPE:boolDEFAULT:False

respect_gitignore

Whether to respect .gitignore files

TYPE:boolDEFAULT:True

max_file_size

Maximum file size in bytes

TYPE:Optional[int]DEFAULT:None

RETURNSDESCRIPTION
List[Path]

List of file paths found

Source code in tenets/utils/scanner.py
Python
def scan(
    self,
    paths: List[Path],
    include_patterns: Optional[List[str]] = None,
    exclude_patterns: Optional[List[str]] = None,
    follow_symlinks: bool = False,
    respect_gitignore: bool = True,
    max_file_size: Optional[int] = None,
) -> List[Path]:
    """Scan paths for files matching criteria.

    Args:
        paths: Paths to scan (files or directories)
        include_patterns: Patterns of files to include (e.g., "*.py")
        exclude_patterns: Additional patterns to exclude
        follow_symlinks: Whether to follow symbolic links
        respect_gitignore: Whether to respect .gitignore files
        max_file_size: Maximum file size in bytes

    Returns:
        List of file paths found
    """
    files = []

    for path in paths:
        if path.is_file():
            # Direct file reference
            if self._should_include_file(
                path, include_patterns, exclude_patterns, max_file_size
            ):
                files.append(path)
        elif path.is_dir():
            # Scan directory
            files.extend(
                self._scan_directory(
                    path,
                    include_patterns,
                    exclude_patterns,
                    follow_symlinks,
                    respect_gitignore,
                    max_file_size,
                )
            )

    # Remove duplicates while preserving order
    seen = set()
    unique_files = []
    for file in files:
        if file not in seen:
            seen.add(file)
            unique_files.append(file)

    self.logger.info(f"Scanned {len(paths)} paths, found {len(unique_files)} files")
    return unique_files
find_files_by_name
Python
find_files_by_name(root: Path, name_pattern: str, case_sensitive: bool = False) -> List[Path]

Find files matching a name pattern.

PARAMETERDESCRIPTION
root

Root directory to search

TYPE:Path

name_pattern

Pattern to match (supports wildcards)

TYPE:str

case_sensitive

Whether to match case-sensitively

TYPE:boolDEFAULT:False

RETURNSDESCRIPTION
List[Path]

List of matching file paths

Source code in tenets/utils/scanner.py
Python
def find_files_by_name(
    self, root: Path, name_pattern: str, case_sensitive: bool = False
) -> List[Path]:
    """Find files matching a name pattern.

    Args:
        root: Root directory to search
        name_pattern: Pattern to match (supports wildcards)
        case_sensitive: Whether to match case-sensitively

    Returns:
        List of matching file paths
    """
    matches = []

    if not case_sensitive:
        name_pattern = name_pattern.lower()

    for file_path in self.scan([root]):
        filename = file_path.name
        if not case_sensitive:
            filename = filename.lower()

        if fnmatch.fnmatch(filename, name_pattern):
            matches.append(file_path)

    return matches
find_files_by_content
Python
find_files_by_content(root: Path, content_pattern: str, file_patterns: Optional[List[str]] = None, case_sensitive: bool = False) -> List[Path]

Find files containing specific content.

PARAMETERDESCRIPTION
root

Root directory to search

TYPE:Path

content_pattern

Text to search for

TYPE:str

file_patterns

File patterns to search in

TYPE:Optional[List[str]]DEFAULT:None

case_sensitive

Whether to match case-sensitively

TYPE:boolDEFAULT:False

RETURNSDESCRIPTION
List[Path]

List of files containing the pattern

Source code in tenets/utils/scanner.py
Python
def find_files_by_content(
    self,
    root: Path,
    content_pattern: str,
    file_patterns: Optional[List[str]] = None,
    case_sensitive: bool = False,
) -> List[Path]:
    """Find files containing specific content.

    Args:
        root: Root directory to search
        content_pattern: Text to search for
        file_patterns: File patterns to search in
        case_sensitive: Whether to match case-sensitively

    Returns:
        List of files containing the pattern
    """
    matches = []

    if not case_sensitive:
        content_pattern = content_pattern.lower()

    for file_path in self.scan([root], include_patterns=file_patterns):
        try:
            with open(file_path, encoding="utf-8") as f:
                content = f.read()
                if not case_sensitive:
                    content = content.lower()

                if content_pattern in content:
                    matches.append(file_path)
        except Exception:
            # Skip files that can't be read as text
            continue

    return matches

Functions