""" Pytest configuration and shared fixtures for the comprehensive Crawailer test suite. This file provides shared fixtures, configuration, and utilities used across all test modules in the production-grade test suite. """ import asyncio import pytest import tempfile import sqlite3 import os from pathlib import Path from typing import Dict, Any, List, Optional from unittest.mock import AsyncMock, MagicMock import psutil import time import threading from crawailer import Browser, BrowserConfig from crawailer.content import WebContent # Pytest configuration def pytest_configure(config): """Configure pytest with custom markers and settings.""" config.addinivalue_line( "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')" ) config.addinivalue_line( "markers", "integration: marks tests as integration tests" ) config.addinivalue_line( "markers", "security: marks tests as security tests" ) config.addinivalue_line( "markers", "performance: marks tests as performance tests" ) config.addinivalue_line( "markers", "edge_case: marks tests as edge case tests" ) config.addinivalue_line( "markers", "regression: marks tests as regression tests" ) def pytest_collection_modifyitems(config, items): """Modify test collection to add markers and configure execution.""" # Add markers based on test file names and test names for item in items: # Mark tests based on file names if "performance" in item.fspath.basename: item.add_marker(pytest.mark.performance) item.add_marker(pytest.mark.slow) elif "security" in item.fspath.basename: item.add_marker(pytest.mark.security) elif "edge_cases" in item.fspath.basename: item.add_marker(pytest.mark.edge_case) elif "production" in item.fspath.basename: item.add_marker(pytest.mark.integration) item.add_marker(pytest.mark.slow) elif "regression" in item.fspath.basename: item.add_marker(pytest.mark.regression) # Mark tests based on test names if "stress" in item.name or "concurrent" in item.name: item.add_marker(pytest.mark.slow) if "timeout" in item.name or "large" in item.name: item.add_marker(pytest.mark.slow) # Shared fixtures @pytest.fixture def browser_config(): """Provide a standard browser configuration for tests.""" return BrowserConfig( headless=True, timeout=30000, viewport={"width": 1920, "height": 1080}, extra_args=["--no-sandbox", "--disable-dev-shm-usage"] ) @pytest.fixture async def mock_browser(): """Provide a fully configured mock browser instance.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock(return_value=AsyncMock(status=200)) mock_page.close = AsyncMock() mock_page.evaluate.return_value = "mock_result" mock_page.content.return_value = "Mock content" mock_page.title.return_value = "Mock Page" mock_browser_instance = AsyncMock() mock_browser_instance.new_page.return_value = mock_page browser._browser = mock_browser_instance browser._is_started = True yield browser @pytest.fixture async def mock_multiple_pages(): """Provide multiple mock pages for concurrent testing.""" pages = [] for i in range(10): mock_page = AsyncMock() mock_page.goto = AsyncMock(return_value=AsyncMock(status=200)) mock_page.close = AsyncMock() mock_page.evaluate.return_value = f"page_{i}_result" mock_page.content.return_value = f"Page {i} content" mock_page.title.return_value = f"Page {i}" pages.append(mock_page) return pages @pytest.fixture def temp_database(): """Provide a temporary SQLite database for testing.""" db_file = tempfile.NamedTemporaryFile(suffix='.db', delete=False) db_file.close() # Initialize database conn = sqlite3.connect(db_file.name) cursor = conn.cursor() # Create test tables cursor.execute(""" CREATE TABLE test_data ( id INTEGER PRIMARY KEY, url TEXT, content TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) cursor.execute(""" CREATE TABLE execution_logs ( id INTEGER PRIMARY KEY, test_name TEXT, execution_time REAL, success BOOLEAN, error_message TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) conn.commit() conn.close() yield db_file.name # Cleanup if os.path.exists(db_file.name): os.unlink(db_file.name) @pytest.fixture def temp_directory(): """Provide a temporary directory for file operations.""" with tempfile.TemporaryDirectory() as temp_dir: yield Path(temp_dir) @pytest.fixture def performance_monitor(): """Provide performance monitoring utilities.""" class PerformanceMonitor: def __init__(self): self.start_time = None self.end_time = None self.start_memory = None self.end_memory = None self.start_threads = None self.end_threads = None def start_monitoring(self): self.start_time = time.time() self.start_memory = psutil.virtual_memory().percent self.start_threads = threading.active_count() def stop_monitoring(self): self.end_time = time.time() self.end_memory = psutil.virtual_memory().percent self.end_threads = threading.active_count() @property def duration(self): if self.start_time and self.end_time: return self.end_time - self.start_time return 0 @property def memory_delta(self): if self.start_memory is not None and self.end_memory is not None: return self.end_memory - self.start_memory return 0 @property def thread_delta(self): if self.start_threads is not None and self.end_threads is not None: return self.end_threads - self.start_threads return 0 return PerformanceMonitor() @pytest.fixture def mock_html_pages(): """Provide mock HTML pages for testing various scenarios.""" return { "simple": """ Simple Page

Hello World

This is a simple test page.

""", "complex": """ Complex Page

Article Title

Article content with bold text.

""", "javascript_heavy": """ JS Heavy Page
Loading...
""", "forms": """ Form Page
""" } @pytest.fixture def mock_web_content(): """Provide mock WebContent objects for testing.""" def create_content(url="https://example.com", title="Test Page", content="Test content"): return WebContent( url=url, title=title, markdown=f"# {title}\n\n{content}", text=content, html=f"{title}

{content}

", word_count=len(content.split()), reading_time="1 min read" ) return create_content @pytest.fixture def error_injection(): """Provide utilities for error injection testing.""" class ErrorInjection: @staticmethod def network_error(): return Exception("Network connection failed") @staticmethod def timeout_error(): return asyncio.TimeoutError("Operation timed out") @staticmethod def javascript_error(): return Exception("JavaScript execution failed: ReferenceError: undefined is not defined") @staticmethod def security_error(): return Exception("Security policy violation: Cross-origin request blocked") @staticmethod def memory_error(): return Exception("Out of memory: Cannot allocate buffer") @staticmethod def syntax_error(): return Exception("SyntaxError: Unexpected token '{'") return ErrorInjection() @pytest.fixture def test_urls(): """Provide a set of test URLs for various scenarios.""" return { "valid": [ "https://example.com", "https://www.google.com", "https://github.com", "http://httpbin.org/get" ], "invalid": [ "not-a-url", "ftp://example.com", "javascript:alert('test')", "file:///etc/passwd" ], "problematic": [ "https://very-slow-site.example.com", "https://nonexistent-domain-12345.invalid", "https://self-signed.badssl.com", "http://localhost:99999" ] } @pytest.fixture(scope="session") def test_session_info(): """Provide session-wide test information.""" return { "start_time": time.time(), "python_version": ".".join(map(str, __import__("sys").version_info[:3])), "platform": __import__("platform").platform(), "test_environment": "pytest" } # Utility functions for tests def assert_performance_within_bounds(duration: float, max_duration: float, test_name: str = ""): """Assert that performance is within acceptable bounds.""" assert duration <= max_duration, f"{test_name} took {duration:.2f}s, expected <= {max_duration:.2f}s" def assert_memory_usage_reasonable(memory_delta: float, max_delta: float = 100.0, test_name: str = ""): """Assert that memory usage is reasonable.""" assert abs(memory_delta) <= max_delta, f"{test_name} memory delta {memory_delta:.1f}MB exceeds {max_delta}MB" def assert_no_resource_leaks(thread_delta: int, max_delta: int = 5, test_name: str = ""): """Assert that there are no significant resource leaks.""" assert abs(thread_delta) <= max_delta, f"{test_name} thread delta {thread_delta} exceeds {max_delta}" # Async test utilities async def wait_for_condition(condition_func, timeout: float = 5.0, interval: float = 0.1): """Wait for a condition to become true within a timeout.""" start_time = time.time() while time.time() - start_time < timeout: if await condition_func() if asyncio.iscoroutinefunction(condition_func) else condition_func(): return True await asyncio.sleep(interval) return False async def execute_with_timeout(coro, timeout: float): """Execute a coroutine with a timeout.""" try: return await asyncio.wait_for(coro, timeout=timeout) except asyncio.TimeoutError: raise asyncio.TimeoutError(f"Operation timed out after {timeout} seconds") # Test data generators def generate_test_scripts(count: int = 10): """Generate test JavaScript scripts.""" scripts = [] for i in range(count): scripts.append(f"return 'test_script_{i}_result'") return scripts def generate_large_data(size_mb: int = 1): """Generate large test data.""" return "x" * (size_mb * 1024 * 1024) def generate_unicode_test_strings(): """Generate Unicode test strings.""" return [ "Hello, 世界! 🌍", "Café résumé naïve", "Тест на русском языке", "اختبار باللغة العربية", "עברית בדיקה", "ひらがな カタカナ 漢字" ] # Custom assertions def assert_valid_web_content(content): """Assert that a WebContent object is valid.""" assert isinstance(content, WebContent) assert content.url assert content.title assert content.text assert content.html assert content.word_count >= 0 assert content.reading_time def assert_script_result_valid(result, expected_type=None): """Assert that a script execution result is valid.""" if expected_type: assert isinstance(result, expected_type) # Result should be JSON serializable import json try: json.dumps(result) except (TypeError, ValueError): pytest.fail(f"Script result {result} is not JSON serializable")