`scanner`¶

Full name: tenets.utils.scanner

scanner¶

File scanning utilities.

This module provides functionality for discovering files in a codebase, respecting ignore patterns and filtering rules.

Classes¶

FileScanner¶

Python

FileScanner(config: TenetsConfig)

Scans directories for files matching criteria.

Initialize the scanner.

PARAMETER	DESCRIPTION
`config`	Tenets configuration TYPE:`TenetsConfig`

Source code in tenets/utils/scanner.py

Python

def __init__(self, config: TenetsConfig):
    """Initialize the scanner.

    Args:
        config: Tenets configuration
    """
    self.config = config
    self.logger = get_logger(__name__)

    # Log multiprocessing configuration
    from tenets.utils.multiprocessing import get_scanner_workers, log_worker_info

    self.workers = get_scanner_workers(config)
    parallel_mode = getattr(config.scanner, "parallel_mode", "auto") if config else "auto"
    log_worker_info(self.logger, "FileScanner", self.workers)
    self.logger.info(f"FileScanner initialized (parallel_mode: {parallel_mode})")

    # Build ignore patterns
    self.ignore_patterns = set(self.DEFAULT_IGNORE_PATTERNS)
    if (
        config
        and hasattr(config, "additional_ignore_patterns")
        and config.additional_ignore_patterns
    ):
        self.ignore_patterns.update(config.additional_ignore_patterns)

    # Add minified file patterns if exclude_minified is True (default)
    self.exclude_minified = getattr(config, "exclude_minified", True) if config else True
    if self.exclude_minified:
        # Add minified patterns
        minified_patterns = getattr(config, "minified_patterns", []) if config else []
        if minified_patterns:
            self.ignore_patterns.update(minified_patterns)
        else:
            # Default minified patterns
            self.ignore_patterns.update(
                [
                    "*.min.js",
                    "*.min.css",
                    "bundle.js",
                    "*.bundle.js",
                    "*.bundle.css",
                    "*.production.js",
                    "*.prod.js",
                    "vendor.prod.js",
                    "*.dist.js",
                    "*.compiled.js",
                ]
            )

        # Add build directory patterns
        build_dirs = getattr(config, "build_directory_patterns", []) if config else []
        if build_dirs:
            # Remove trailing slashes for directory name matching
            self.ignore_patterns.update(d.rstrip("/") for d in build_dirs)
        else:
            # Default build directories (without trailing slashes)
            self.ignore_patterns.update(["dist", "build", "out", "output", "node_modules"])

Functions¶

scan¶

Python

scan(paths: List[Path], include_patterns: Optional[List[str]] = None, exclude_patterns: Optional[List[str]] = None, follow_symlinks: bool = False, respect_gitignore: bool = True, max_file_size: Optional[int] = None) -> List[Path]

Scan paths for files matching criteria.

PARAMETER	DESCRIPTION
`paths`	Paths to scan (files or directories) TYPE:`List[Path]`
`include_patterns`	Patterns of files to include (e.g., ".py") TYPE:`Optional[List[str]]`DEFAULT:*`None`
`exclude_patterns`	Additional patterns to exclude TYPE:`Optional[List[str]]`DEFAULT:`None`
`follow_symlinks`	Whether to follow symbolic links TYPE:`bool`DEFAULT:`False`
`respect_gitignore`	Whether to respect .gitignore files TYPE:`bool`DEFAULT:`True`
`max_file_size`	Maximum file size in bytes TYPE:`Optional[int]`DEFAULT:`None`

RETURNS	DESCRIPTION
`List[Path]`	List of file paths found

Source code in tenets/utils/scanner.py

Python

def scan(
    self,
    paths: List[Path],
    include_patterns: Optional[List[str]] = None,
    exclude_patterns: Optional[List[str]] = None,
    follow_symlinks: bool = False,
    respect_gitignore: bool = True,
    max_file_size: Optional[int] = None,
) -> List[Path]:
    """Scan paths for files matching criteria.

    Args:
        paths: Paths to scan (files or directories)
        include_patterns: Patterns of files to include (e.g., "*.py")
        exclude_patterns: Additional patterns to exclude
        follow_symlinks: Whether to follow symbolic links
        respect_gitignore: Whether to respect .gitignore files
        max_file_size: Maximum file size in bytes

    Returns:
        List of file paths found
    """
    files = []

    for path in paths:
        if path.is_file():
            # Direct file reference
            if self._should_include_file(
                path, include_patterns, exclude_patterns, max_file_size
            ):
                files.append(path)
        elif path.is_dir():
            # Scan directory
            files.extend(
                self._scan_directory(
                    path,
                    include_patterns,
                    exclude_patterns,
                    follow_symlinks,
                    respect_gitignore,
                    max_file_size,
                )
            )

    # Remove duplicates while preserving order
    seen = set()
    unique_files = []
    for file in files:
        if file not in seen:
            seen.add(file)
            unique_files.append(file)

    self.logger.info(f"Scanned {len(paths)} paths, found {len(unique_files)} files")
    return unique_files

find_files_by_name¶

Python

find_files_by_name(root: Path, name_pattern: str, case_sensitive: bool = False) -> List[Path]

Find files matching a name pattern.

PARAMETER	DESCRIPTION
`root`	Root directory to search TYPE:`Path`
`name_pattern`	Pattern to match (supports wildcards) TYPE:`str`
`case_sensitive`	Whether to match case-sensitively TYPE:`bool`DEFAULT:`False`

RETURNS	DESCRIPTION
`List[Path]`	List of matching file paths

Source code in tenets/utils/scanner.py

Python

def find_files_by_name(
    self, root: Path, name_pattern: str, case_sensitive: bool = False
) -> List[Path]:
    """Find files matching a name pattern.

    Args:
        root: Root directory to search
        name_pattern: Pattern to match (supports wildcards)
        case_sensitive: Whether to match case-sensitively

    Returns:
        List of matching file paths
    """
    matches = []

    if not case_sensitive:
        name_pattern = name_pattern.lower()

    for file_path in self.scan([root]):
        filename = file_path.name
        if not case_sensitive:
            filename = filename.lower()

        if fnmatch.fnmatch(filename, name_pattern):
            matches.append(file_path)

    return matches

find_files_by_content¶

Python

find_files_by_content(root: Path, content_pattern: str, file_patterns: Optional[List[str]] = None, case_sensitive: bool = False) -> List[Path]

Find files containing specific content.

PARAMETER	DESCRIPTION
`root`	Root directory to search TYPE:`Path`
`content_pattern`	Text to search for TYPE:`str`
`file_patterns`	File patterns to search in TYPE:`Optional[List[str]]`DEFAULT:`None`
`case_sensitive`	Whether to match case-sensitively TYPE:`bool`DEFAULT:`False`

RETURNS	DESCRIPTION
`List[Path]`	List of files containing the pattern

Source code in tenets/utils/scanner.py

Python

def find_files_by_content(
    self,
    root: Path,
    content_pattern: str,
    file_patterns: Optional[List[str]] = None,
    case_sensitive: bool = False,
) -> List[Path]:
    """Find files containing specific content.

    Args:
        root: Root directory to search
        content_pattern: Text to search for
        file_patterns: File patterns to search in
        case_sensitive: Whether to match case-sensitively

    Returns:
        List of files containing the pattern
    """
    matches = []

    if not case_sensitive:
        content_pattern = content_pattern.lower()

    for file_path in self.scan([root], include_patterns=file_patterns):
        try:
            with open(file_path, encoding="utf-8") as f:
                content = f.read()
                if not case_sensitive:
                    content = content.lower()

                if content_pattern in content:
                    matches.append(file_path)
        except Exception:
            # Skip files that can't be read as text
            continue

    return matches