tigerstyle-life9/includes/class-file-scanner.php
Ryan Malloy e92b7f8700 Initial commit: TigerStyle Life9 v1.0.0
Because cats have 9 lives, but servers don't - so they need
backup-restore! Complete backup solution with S3/MinIO support.

- Full WordPress backup (files + database)
- S3 / MinIO / S3-compatible storage backends
- Scheduled automatic backups
- Disaster recovery / one-click restore
- Backup integrity validation
- Cat-themed admin interface

Includes build.sh and .distignore for WordPress-installable release ZIPs.
2026-05-27 14:32:00 -06:00

564 lines
17 KiB
PHP

<?php
/**
* File Scanner
*
* Secure file system scanning with path validation and exclusion patterns
*
* @package TigerStyleLife9
* @since 1.0.0
*/
// Exit if accessed directly
if (!defined('ABSPATH')) {
exit;
}
/**
* File scanner class
*
* @since 1.0.0
*/
class TigerStyle_Life9_File_Scanner {
/**
* Security instance
*
* @var TigerStyle_Life9_Security
*/
private $security;
/**
* Scan statistics
*
* @var array
*/
private $stats;
/**
* Constructor
*/
public function __construct() {
$this->security = tigerstyle_life9()->get_security();
$this->reset_stats();
}
/**
* Scan directory and return file information
*
* @param string $path Directory path to scan
* @param array $options Scan options
* @return array Array of file information
*/
public function scan_directory($path, $options = []) {
$defaults = [
'show_hidden' => false,
'max_depth' => 1,
'include_stats' => true,
'exclude_patterns' => []
];
$options = array_merge($defaults, $options);
// Validate path
if (!$this->security->validate_path($path, ABSPATH)) {
throw new Exception('Invalid or unsafe path');
}
if (!is_dir($path) || !is_readable($path)) {
throw new Exception('Directory not found or not readable');
}
$files = [];
$this->reset_stats();
try {
$iterator = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($path, RecursiveDirectoryIterator::SKIP_DOTS),
RecursiveIteratorIterator::SELF_FIRST
);
if ($options['max_depth'] > 0) {
$iterator->setMaxDepth($options['max_depth'] - 1);
}
foreach ($iterator as $file) {
$file_path = $file->getPathname();
// Security check for each file
if (!$this->security->validate_path($file_path, ABSPATH)) {
continue;
}
// Skip hidden files if not requested
if (!$options['show_hidden'] && $this->is_hidden_file($file_path)) {
continue;
}
// Check exclude patterns
if ($this->should_exclude($file_path, $options['exclude_patterns'])) {
continue;
}
$file_info = $this->get_file_info($file, $options['include_stats']);
if ($file_info) {
$files[] = $file_info;
$this->update_stats($file_info);
}
}
} catch (Exception $e) {
error_log('TigerStyle Life9: File scan error - ' . $e->getMessage());
throw new Exception('File scan failed: ' . $e->getMessage());
}
// Sort files: directories first, then by name
usort($files, function($a, $b) {
if ($a['type'] !== $b['type']) {
return $a['type'] === 'directory' ? -1 : 1;
}
return strcasecmp($a['name'], $b['name']);
});
return $files;
}
/**
* Scan files for backup
*
* @param array $config Scan configuration
* @return array Array of files to backup
*/
public function scan_files($config = []) {
$defaults = [
'include_paths' => [ABSPATH],
'exclude_patterns' => [],
'follow_symlinks' => false,
'max_file_size' => 1024 * 1024 * 1024, // 1GB
'skip_empty_files' => false
];
$config = array_merge($defaults, $config);
$files = [];
$this->reset_stats();
foreach ($config['include_paths'] as $path) {
// Validate path
if (!$this->security->validate_path($path, ABSPATH)) {
error_log("TigerStyle Life9: Skipping invalid path: {$path}");
continue;
}
if (!file_exists($path)) {
error_log("TigerStyle Life9: Path does not exist: {$path}");
continue;
}
if (is_file($path)) {
// Single file
$file_info = $this->get_file_info_from_path($path, true);
if ($file_info && $this->should_include_file($file_info, $config)) {
$files[] = $file_info;
$this->update_stats($file_info);
}
} else {
// Directory - recursive scan
$directory_files = $this->scan_directory_recursive($path, $config);
$files = array_merge($files, $directory_files);
}
}
return $files;
}
/**
* Recursively scan directory for backup
*
* @param string $path Directory path
* @param array $config Scan configuration
* @return array Array of files
*/
private function scan_directory_recursive($path, $config) {
$files = [];
try {
$flags = RecursiveDirectoryIterator::SKIP_DOTS;
if (!$config['follow_symlinks']) {
$flags |= RecursiveDirectoryIterator::FOLLOW_SYMLINKS;
}
$iterator = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($path, $flags),
RecursiveIteratorIterator::SELF_FIRST
);
foreach ($iterator as $file) {
$file_path = $file->getPathname();
// Security validation
if (!$this->security->validate_path($file_path, ABSPATH)) {
continue;
}
// Check exclude patterns
if ($this->should_exclude($file_path, $config['exclude_patterns'])) {
continue;
}
$file_info = $this->get_file_info($file, true);
if ($file_info && $this->should_include_file($file_info, $config)) {
$files[] = $file_info;
$this->update_stats($file_info);
}
}
} catch (Exception $e) {
error_log('TigerStyle Life9: Recursive scan error - ' . $e->getMessage());
}
return $files;
}
/**
* Get file information
*
* @param SplFileInfo $file File object
* @param bool $include_stats Include file statistics
* @return array|null File information or null if error
*/
private function get_file_info($file, $include_stats = true) {
try {
$file_path = $file->getPathname();
$file_info = [
'name' => $file->getFilename(),
'path' => $file_path,
'type' => $file->isDir() ? 'directory' : 'file',
'size' => $file->isFile() ? $file->getSize() : 0,
'modified' => date('Y-m-d H:i:s', $file->getMTime()),
'permissions' => substr(sprintf('%o', $file->getPerms()), -4),
'readable' => $file->isReadable(),
'writable' => $file->isWritable()
];
if ($include_stats && $file->isFile()) {
$file_info['mime_type'] = $this->get_mime_type($file_path);
$file_info['extension'] = strtolower($file->getExtension());
}
return $file_info;
} catch (Exception $e) {
error_log('TigerStyle Life9: Get file info error - ' . $e->getMessage());
return null;
}
}
/**
* Get file information from path
*
* @param string $file_path File path
* @param bool $include_stats Include file statistics
* @return array|null File information or null if error
*/
private function get_file_info_from_path($file_path, $include_stats = true) {
try {
if (!file_exists($file_path)) {
return null;
}
$file_info = [
'name' => basename($file_path),
'path' => $file_path,
'type' => is_dir($file_path) ? 'directory' : 'file',
'size' => is_file($file_path) ? filesize($file_path) : 0,
'modified' => date('Y-m-d H:i:s', filemtime($file_path)),
'permissions' => substr(sprintf('%o', fileperms($file_path)), -4),
'readable' => is_readable($file_path),
'writable' => is_writable($file_path)
];
if ($include_stats && is_file($file_path)) {
$file_info['mime_type'] = $this->get_mime_type($file_path);
$file_info['extension'] = strtolower(pathinfo($file_path, PATHINFO_EXTENSION));
}
return $file_info;
} catch (Exception $e) {
error_log('TigerStyle Life9: Get file info from path error - ' . $e->getMessage());
return null;
}
}
/**
* Check if file should be excluded
*
* @param string $file_path File path
* @param array $exclude_patterns Exclude patterns
* @return bool True if file should be excluded
*/
private function should_exclude($file_path, $exclude_patterns) {
if (empty($exclude_patterns)) {
return false;
}
$relative_path = str_replace(ABSPATH, '', $file_path);
$filename = basename($file_path);
foreach ($exclude_patterns as $pattern) {
// Validate pattern for security
$validator = new TigerStyle_Life9_Validator();
if (!$validator->validate_exclude_pattern($pattern)) {
continue;
}
// Convert glob pattern to regex if needed
if (strpos($pattern, '*') !== false || strpos($pattern, '?') !== false) {
$regex_pattern = $this->glob_to_regex($pattern);
if (preg_match($regex_pattern, $relative_path) || preg_match($regex_pattern, $filename)) {
return true;
}
} else {
// Exact match or substring match
if (strpos($relative_path, $pattern) !== false || strpos($filename, $pattern) !== false) {
return true;
}
}
}
return false;
}
/**
* Check if file should be included in backup
*
* @param array $file_info File information
* @param array $config Scan configuration
* @return bool True if file should be included
*/
private function should_include_file($file_info, $config) {
// Skip empty files if configured
if ($config['skip_empty_files'] && $file_info['size'] === 0 && $file_info['type'] === 'file') {
return false;
}
// Check file size limit
if ($file_info['size'] > $config['max_file_size']) {
return false;
}
// Skip unreadable files
if (!$file_info['readable']) {
return false;
}
// Skip system files and temporary files
$system_patterns = [
'/proc/',
'/sys/',
'/dev/',
'/tmp/',
'.tmp',
'~$',
'.swp',
'.lock'
];
foreach ($system_patterns as $pattern) {
if (strpos($file_info['path'], $pattern) !== false) {
return false;
}
}
return true;
}
/**
* Check if file is hidden
*
* @param string $file_path File path
* @return bool True if file is hidden
*/
private function is_hidden_file($file_path) {
$filename = basename($file_path);
// Unix hidden files (start with .)
if (strpos($filename, '.') === 0 && $filename !== '.' && $filename !== '..') {
return true;
}
// Windows hidden files (check attributes if on Windows)
if (PHP_OS_FAMILY === 'Windows' && file_exists($file_path)) {
$attrs = fileperms($file_path);
return ($attrs & 0x02) !== 0; // FILE_ATTRIBUTE_HIDDEN
}
return false;
}
/**
* Get MIME type of file
*
* @param string $file_path File path
* @return string MIME type
*/
private function get_mime_type($file_path) {
if (function_exists('finfo_file')) {
$finfo = finfo_open(FILEINFO_MIME_TYPE);
$mime_type = finfo_file($finfo, $file_path);
finfo_close($finfo);
if ($mime_type) {
return $mime_type;
}
}
if (function_exists('mime_content_type')) {
$mime_type = mime_content_type($file_path);
if ($mime_type) {
return $mime_type;
}
}
// Fallback based on extension
$extension = strtolower(pathinfo($file_path, PATHINFO_EXTENSION));
$mime_types = [
'txt' => 'text/plain',
'php' => 'application/x-php',
'html' => 'text/html',
'css' => 'text/css',
'js' => 'application/javascript',
'json' => 'application/json',
'xml' => 'text/xml',
'jpg' => 'image/jpeg',
'jpeg' => 'image/jpeg',
'png' => 'image/png',
'gif' => 'image/gif',
'pdf' => 'application/pdf',
'zip' => 'application/zip'
];
return $mime_types[$extension] ?? 'application/octet-stream';
}
/**
* Convert glob pattern to regex
*
* @param string $pattern Glob pattern
* @return string Regex pattern
*/
private function glob_to_regex($pattern) {
$regex = preg_quote($pattern, '/');
// Replace glob wildcards with regex equivalents
$regex = str_replace('\*', '.*', $regex);
$regex = str_replace('\?', '.', $regex);
return '/^' . $regex . '$/i';
}
/**
* Reset scan statistics
*/
private function reset_stats() {
$this->stats = [
'total_files' => 0,
'total_directories' => 0,
'total_size' => 0,
'largest_file' => ['size' => 0, 'path' => ''],
'file_types' => []
];
}
/**
* Update scan statistics
*
* @param array $file_info File information
*/
private function update_stats($file_info) {
if ($file_info['type'] === 'file') {
$this->stats['total_files']++;
$this->stats['total_size'] += $file_info['size'];
if ($file_info['size'] > $this->stats['largest_file']['size']) {
$this->stats['largest_file'] = [
'size' => $file_info['size'],
'path' => $file_info['path']
];
}
if (isset($file_info['extension'])) {
$ext = $file_info['extension'];
$this->stats['file_types'][$ext] = ($this->stats['file_types'][$ext] ?? 0) + 1;
}
} else {
$this->stats['total_directories']++;
}
}
/**
* Get scan statistics
*
* @return array Scan statistics
*/
public function get_stats() {
return $this->stats;
}
/**
* Get disk usage for path
*
* @param string $path Directory path
* @return array Disk usage information
*/
public function get_disk_usage($path) {
if (!$this->security->validate_path($path, ABSPATH)) {
throw new Exception('Invalid path');
}
$total_size = 0;
$file_count = 0;
$dir_count = 0;
try {
$iterator = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($path, RecursiveDirectoryIterator::SKIP_DOTS)
);
foreach ($iterator as $file) {
if ($file->isFile()) {
$total_size += $file->getSize();
$file_count++;
} else {
$dir_count++;
}
}
} catch (Exception $e) {
error_log('TigerStyle Life9: Disk usage calculation error - ' . $e->getMessage());
}
return [
'total_size' => $total_size,
'file_count' => $file_count,
'directory_count' => $dir_count,
'formatted_size' => $this->format_bytes($total_size)
];
}
/**
* Format bytes to human readable format
*
* @param int $bytes Number of bytes
* @return string Formatted size
*/
private function format_bytes($bytes) {
$units = ['B', 'KB', 'MB', 'GB', 'TB'];
for ($i = 0; $bytes > 1024; $i++) {
$bytes /= 1024;
}
return round($bytes, 2) . ' ' . $units[$i];
}
}