scanner
¶
Full name: tenets.utils.scanner
scanner¶
File scanning utilities.
This module provides functionality for discovering files in a codebase, respecting ignore patterns and filtering rules.
Classes¶
FileScanner¶
Scans directories for files matching criteria.
Initialize the scanner.
PARAMETER | DESCRIPTION |
---|---|
config | Tenets configuration TYPE: |
Source code in tenets/utils/scanner.py
def __init__(self, config: TenetsConfig):
"""Initialize the scanner.
Args:
config: Tenets configuration
"""
self.config = config
self.logger = get_logger(__name__)
# Log multiprocessing configuration
from tenets.utils.multiprocessing import get_scanner_workers, log_worker_info
self.workers = get_scanner_workers(config)
parallel_mode = getattr(config.scanner, "parallel_mode", "auto") if config else "auto"
log_worker_info(self.logger, "FileScanner", self.workers)
self.logger.info(f"FileScanner initialized (parallel_mode: {parallel_mode})")
# Build ignore patterns
self.ignore_patterns = set(self.DEFAULT_IGNORE_PATTERNS)
if (
config
and hasattr(config, "additional_ignore_patterns")
and config.additional_ignore_patterns
):
self.ignore_patterns.update(config.additional_ignore_patterns)
# Add minified file patterns if exclude_minified is True (default)
self.exclude_minified = getattr(config, "exclude_minified", True) if config else True
if self.exclude_minified:
# Add minified patterns
minified_patterns = getattr(config, "minified_patterns", []) if config else []
if minified_patterns:
self.ignore_patterns.update(minified_patterns)
else:
# Default minified patterns
self.ignore_patterns.update(
[
"*.min.js",
"*.min.css",
"bundle.js",
"*.bundle.js",
"*.bundle.css",
"*.production.js",
"*.prod.js",
"vendor.prod.js",
"*.dist.js",
"*.compiled.js",
]
)
# Add build directory patterns
build_dirs = getattr(config, "build_directory_patterns", []) if config else []
if build_dirs:
# Remove trailing slashes for directory name matching
self.ignore_patterns.update(d.rstrip("/") for d in build_dirs)
else:
# Default build directories (without trailing slashes)
self.ignore_patterns.update(["dist", "build", "out", "output", "node_modules"])
Functions¶
scan¶
scan(paths: List[Path], include_patterns: Optional[List[str]] = None, exclude_patterns: Optional[List[str]] = None, follow_symlinks: bool = False, respect_gitignore: bool = True, max_file_size: Optional[int] = None) -> List[Path]
Scan paths for files matching criteria.
PARAMETER | DESCRIPTION |
---|---|
paths | Paths to scan (files or directories) |
include_patterns | Patterns of files to include (e.g., "*.py") |
exclude_patterns | Additional patterns to exclude |
follow_symlinks | Whether to follow symbolic links TYPE: |
respect_gitignore | Whether to respect .gitignore files TYPE: |
max_file_size | Maximum file size in bytes |
RETURNS | DESCRIPTION |
---|---|
List[Path] | List of file paths found |
Source code in tenets/utils/scanner.py
def scan(
self,
paths: List[Path],
include_patterns: Optional[List[str]] = None,
exclude_patterns: Optional[List[str]] = None,
follow_symlinks: bool = False,
respect_gitignore: bool = True,
max_file_size: Optional[int] = None,
) -> List[Path]:
"""Scan paths for files matching criteria.
Args:
paths: Paths to scan (files or directories)
include_patterns: Patterns of files to include (e.g., "*.py")
exclude_patterns: Additional patterns to exclude
follow_symlinks: Whether to follow symbolic links
respect_gitignore: Whether to respect .gitignore files
max_file_size: Maximum file size in bytes
Returns:
List of file paths found
"""
files = []
for path in paths:
if path.is_file():
# Direct file reference
if self._should_include_file(
path, include_patterns, exclude_patterns, max_file_size
):
files.append(path)
elif path.is_dir():
# Scan directory
files.extend(
self._scan_directory(
path,
include_patterns,
exclude_patterns,
follow_symlinks,
respect_gitignore,
max_file_size,
)
)
# Remove duplicates while preserving order
seen = set()
unique_files = []
for file in files:
if file not in seen:
seen.add(file)
unique_files.append(file)
self.logger.info(f"Scanned {len(paths)} paths, found {len(unique_files)} files")
return unique_files
find_files_by_name¶
Find files matching a name pattern.
PARAMETER | DESCRIPTION |
---|---|
root | Root directory to search TYPE: |
name_pattern | Pattern to match (supports wildcards) TYPE: |
case_sensitive | Whether to match case-sensitively TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[Path] | List of matching file paths |
Source code in tenets/utils/scanner.py
def find_files_by_name(
self, root: Path, name_pattern: str, case_sensitive: bool = False
) -> List[Path]:
"""Find files matching a name pattern.
Args:
root: Root directory to search
name_pattern: Pattern to match (supports wildcards)
case_sensitive: Whether to match case-sensitively
Returns:
List of matching file paths
"""
matches = []
if not case_sensitive:
name_pattern = name_pattern.lower()
for file_path in self.scan([root]):
filename = file_path.name
if not case_sensitive:
filename = filename.lower()
if fnmatch.fnmatch(filename, name_pattern):
matches.append(file_path)
return matches
find_files_by_content¶
find_files_by_content(root: Path, content_pattern: str, file_patterns: Optional[List[str]] = None, case_sensitive: bool = False) -> List[Path]
Find files containing specific content.
PARAMETER | DESCRIPTION |
---|---|
root | Root directory to search TYPE: |
content_pattern | Text to search for TYPE: |
file_patterns | File patterns to search in |
case_sensitive | Whether to match case-sensitively TYPE: |
RETURNS | DESCRIPTION |
---|---|
List[Path] | List of files containing the pattern |
Source code in tenets/utils/scanner.py
def find_files_by_content(
self,
root: Path,
content_pattern: str,
file_patterns: Optional[List[str]] = None,
case_sensitive: bool = False,
) -> List[Path]:
"""Find files containing specific content.
Args:
root: Root directory to search
content_pattern: Text to search for
file_patterns: File patterns to search in
case_sensitive: Whether to match case-sensitively
Returns:
List of files containing the pattern
"""
matches = []
if not case_sensitive:
content_pattern = content_pattern.lower()
for file_path in self.scan([root], include_patterns=file_patterns):
try:
with open(file_path, encoding="utf-8") as f:
content = f.read()
if not case_sensitive:
content = content.lower()
if content_pattern in content:
matches.append(file_path)
except Exception:
# Skip files that can't be read as text
continue
return matches