""" Memory Management and Leak Detection Tests Tests for memory usage patterns, leak detection, and resource cleanup in browser automation scenarios. Critical for production deployments that need to handle long-running operations without memory bloat. Test Categories: - Memory baseline and growth patterns - DOM node accumulation and cleanup - JavaScript heap management - Event listener leak detection - Resource cleanup validation - Long-running session stability - Memory pressure handling - Garbage collection effectiveness """ import pytest import asyncio import gc import psutil import os from unittest.mock import Mock, patch, AsyncMock from typing import List, Dict, Any from crawailer import get, get_many, discover from crawailer.browser import Browser from crawailer.config import BrowserConfig class MockMemoryProfiler: """Mock memory profiler for testing memory patterns""" def __init__(self): self.baseline = 50_000_000 # 50MB baseline self.current = self.baseline self.peak = self.baseline self.allocations = [] def get_memory_usage(self) -> int: """Get current memory usage in bytes""" return self.current def allocate(self, size: int): """Simulate memory allocation""" self.current += size self.peak = max(self.peak, self.current) self.allocations.append(size) def deallocate(self, size: int): """Simulate memory deallocation""" self.current = max(self.baseline, self.current - size) def trigger_gc(self): """Simulate garbage collection""" # Cleanup 70% of non-baseline memory excess = self.current - self.baseline if excess > 0: cleanup = int(excess * 0.7) self.current -= cleanup class MockBrowserMemory: """Mock browser memory tracking""" def __init__(self): self.dom_nodes = 1000 # Initial DOM nodes self.js_heap_size = 10_000_000 # 10MB self.event_listeners = 50 self.network_connections = 0 self.active_timers = 0 def add_dom_nodes(self, count: int): self.dom_nodes += count def remove_dom_nodes(self, count: int): self.dom_nodes = max(1000, self.dom_nodes - count) def allocate_js_heap(self, size: int): self.js_heap_size += size def add_event_listeners(self, count: int): self.event_listeners += count def cleanup_listeners(self, count: int): self.event_listeners = max(50, self.event_listeners - count) @pytest.fixture def memory_profiler(): """Memory profiler fixture""" return MockMemoryProfiler() @pytest.fixture def browser_memory(): """Browser memory tracking fixture""" return MockBrowserMemory() @pytest.fixture def mock_browser_with_memory(browser_memory): """Browser with memory tracking""" browser = Mock() browser.memory = browser_memory async def mock_fetch_page(url, **kwargs): # Simulate memory allocation during page load browser.memory.add_dom_nodes(500) browser.memory.allocate_js_heap(1_000_000) browser.memory.add_event_listeners(10) script_result = None if 'script_after' in kwargs: script = kwargs['script_after'] if 'memory' in script.lower(): script_result = { 'domNodes': browser.memory.dom_nodes, 'heapSize': browser.memory.js_heap_size, 'listeners': browser.memory.event_listeners } elif 'leak' in script.lower(): # Simulate memory leak browser.memory.add_dom_nodes(1000) browser.memory.allocate_js_heap(5_000_000) script_result = {'leaked': True} return Mock( content="Memory test page", url=url, script_result=script_result, status_code=200 ) browser.fetch_page = mock_fetch_page return browser class TestMemoryBaseline: """Test memory baseline and growth patterns""" @pytest.mark.asyncio async def test_memory_baseline_establishment(self, memory_profiler, mock_browser_with_memory): """Test establishing memory usage baseline""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): initial_memory = memory_profiler.get_memory_usage() # Single page load should have predictable memory usage content = await get("http://localhost:8083/memory-test") # Simulate some memory allocation for page processing memory_profiler.allocate(2_000_000) # 2MB for page processing final_memory = memory_profiler.get_memory_usage() memory_growth = final_memory - initial_memory # Memory growth should be reasonable (under 5MB for single page) assert memory_growth < 5_000_000 assert content.content is not None @pytest.mark.asyncio async def test_memory_growth_patterns(self, memory_profiler, mock_browser_with_memory): """Test memory growth patterns over multiple operations""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): baseline = memory_profiler.get_memory_usage() measurements = [baseline] # Process multiple pages and track memory growth urls = [f"http://localhost:8083/page-{i}" for i in range(10)] for i, url in enumerate(urls): await get(url) # Simulate incremental memory usage memory_profiler.allocate(1_500_000) # 1.5MB per page measurements.append(memory_profiler.get_memory_usage()) # Check for linear vs exponential growth growth_rates = [] for i in range(1, len(measurements)): rate = measurements[i] - measurements[i-1] growth_rates.append(rate) # Growth should be roughly linear, not exponential avg_growth = sum(growth_rates) / len(growth_rates) for rate in growth_rates[-3:]: # Check last 3 measurements assert abs(rate - avg_growth) < avg_growth * 0.5 # Within 50% of average @pytest.mark.asyncio async def test_memory_with_javascript_execution(self, memory_profiler, mock_browser_with_memory): """Test memory usage with JavaScript execution""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): baseline = memory_profiler.get_memory_usage() # Execute JavaScript that reports memory usage content = await get( "http://localhost:8083/js-memory-test", script="window.performance.memory ? window.performance.memory.usedJSHeapSize : 'unavailable'" ) # Simulate JS execution memory overhead memory_profiler.allocate(3_000_000) # 3MB for JS execution final_memory = memory_profiler.get_memory_usage() js_overhead = final_memory - baseline # JS execution should have reasonable overhead assert js_overhead < 10_000_000 # Under 10MB assert content.script_result is not None class TestDOMNodeManagement: """Test DOM node accumulation and cleanup""" @pytest.mark.asyncio async def test_dom_node_accumulation(self, browser_memory, mock_browser_with_memory): """Test DOM node accumulation over multiple page loads""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): initial_nodes = browser_memory.dom_nodes # Load pages with varying DOM complexity urls = [ "http://localhost:8083/simple-page", # 500 nodes "http://localhost:8083/complex-page", # 500 nodes "http://localhost:8083/heavy-page" # 500 nodes ] for url in urls: await get(url) final_nodes = browser_memory.dom_nodes node_growth = final_nodes - initial_nodes # Should accumulate nodes (1500 added) assert node_growth == 1500 assert final_nodes == 2500 @pytest.mark.asyncio async def test_dom_cleanup_between_pages(self, browser_memory, mock_browser_with_memory): """Test DOM cleanup between page navigations""" # Modify mock to simulate cleanup original_fetch = mock_browser_with_memory.fetch_page async def fetch_with_cleanup(url, **kwargs): # Cleanup previous page DOM nodes (simulate navigation) if browser_memory.dom_nodes > 1000: cleanup_nodes = min(500, browser_memory.dom_nodes - 1000) browser_memory.remove_dom_nodes(cleanup_nodes) return await original_fetch(url, **kwargs) mock_browser_with_memory.fetch_page = fetch_with_cleanup with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Load multiple pages with cleanup for i in range(5): await get(f"http://localhost:8083/page-{i}") # Should maintain reasonable DOM node count assert browser_memory.dom_nodes < 3000 # Not unlimited growth @pytest.mark.asyncio async def test_large_dom_handling(self, browser_memory, mock_browser_with_memory): """Test handling of pages with very large DOM trees""" # Simulate large page async def fetch_large_page(url, **kwargs): if 'large' in url: browser_memory.add_dom_nodes(10000) # Very large page else: browser_memory.add_dom_nodes(500) # Normal page return Mock( content="Large DOM test", url=url, status_code=200 ) mock_browser_with_memory.fetch_page = fetch_large_page with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): initial_nodes = browser_memory.dom_nodes # Load large page content = await get("http://localhost:8083/large-dom-page") final_nodes = browser_memory.dom_nodes assert final_nodes - initial_nodes == 10000 assert content.content is not None class TestJavaScriptHeapManagement: """Test JavaScript heap memory management""" @pytest.mark.asyncio async def test_js_heap_growth(self, browser_memory, mock_browser_with_memory): """Test JavaScript heap growth patterns""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): initial_heap = browser_memory.js_heap_size # Execute scripts that allocate memory memory_scripts = [ "new Array(100000).fill('data')", # Allocate array "Object.assign({}, ...new Array(1000).fill({key: 'value'}))", # Object allocation "document.querySelectorAll('*').length" # DOM query ] for script in memory_scripts: await get("http://localhost:8083/js-test", script=script) final_heap = browser_memory.js_heap_size heap_growth = final_heap - initial_heap # Should show measurable heap growth assert heap_growth == 3_000_000 # 1MB per script execution @pytest.mark.asyncio async def test_js_memory_leak_detection(self, browser_memory, mock_browser_with_memory): """Test detection of JavaScript memory leaks""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Execute script that creates potential leak leak_script = """ // Simulate memory leak pattern window.leakyData = window.leakyData || []; window.leakyData.push(new Array(10000).fill('leak')); 'leak created' """ initial_heap = browser_memory.js_heap_size # Execute leak script multiple times for i in range(3): content = await get("http://localhost:8083/leak-test", script=leak_script) final_heap = browser_memory.js_heap_size leak_growth = final_heap - initial_heap # Should detect significant memory growth assert leak_growth >= 15_000_000 # Significant growth indicates leak assert content.script_result == {'leaked': True} @pytest.mark.asyncio async def test_js_garbage_collection(self, browser_memory, mock_browser_with_memory): """Test JavaScript garbage collection effectiveness""" # Add GC simulation to mock async def fetch_with_gc(url, **kwargs): result = await mock_browser_with_memory.fetch_page(url, **kwargs) # Simulate GC trigger after script execution if 'script_after' in kwargs and 'gc' in kwargs['script_after'].lower(): # Simulate GC cleanup (reduce heap by 50%) excess_heap = browser_memory.js_heap_size - 10_000_000 if excess_heap > 0: browser_memory.js_heap_size -= int(excess_heap * 0.5) return result mock_browser_with_memory.fetch_page = fetch_with_gc with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Allocate memory then trigger GC await get("http://localhost:8083/allocate", script="new Array(1000000).fill('data')") pre_gc_heap = browser_memory.js_heap_size await get("http://localhost:8083/gc-test", script="if (window.gc) window.gc(); 'gc triggered'") post_gc_heap = browser_memory.js_heap_size # GC should reduce heap size assert post_gc_heap < pre_gc_heap class TestEventListenerLeaks: """Test event listener leak detection and cleanup""" @pytest.mark.asyncio async def test_event_listener_accumulation(self, browser_memory, mock_browser_with_memory): """Test event listener accumulation patterns""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): initial_listeners = browser_memory.event_listeners # Execute scripts that add event listeners listener_scripts = [ "document.addEventListener('click', function() {})", "window.addEventListener('resize', function() {})", "document.body.addEventListener('mouseover', function() {})" ] for script in listener_scripts: await get("http://localhost:8083/listener-test", script=script) final_listeners = browser_memory.event_listeners listener_growth = final_listeners - initial_listeners # Should accumulate listeners (10 per page + 3 custom = 33) assert listener_growth == 33 @pytest.mark.asyncio async def test_listener_cleanup_on_navigation(self, browser_memory, mock_browser_with_memory): """Test listener cleanup during page navigation""" # Modify mock to simulate listener cleanup navigation_count = 0 async def fetch_with_listener_cleanup(url, **kwargs): nonlocal navigation_count navigation_count += 1 # Cleanup listeners on navigation (every 2nd navigation) if navigation_count % 2 == 0 and browser_memory.event_listeners > 50: cleanup_count = min(20, browser_memory.event_listeners - 50) browser_memory.cleanup_listeners(cleanup_count) return await mock_browser_with_memory.fetch_page(url, **kwargs) mock_browser_with_memory.fetch_page = fetch_with_listener_cleanup with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Navigate multiple times for i in range(6): await get(f"http://localhost:8083/nav-test-{i}") # Should show periodic cleanup assert browser_memory.event_listeners < 120 # Not unlimited growth @pytest.mark.asyncio async def test_orphaned_listener_detection(self, browser_memory, mock_browser_with_memory): """Test detection of orphaned event listeners""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Create scenario with orphaned listeners orphan_script = """ // Create elements, add listeners, then remove elements (orphaning listeners) const div = document.createElement('div'); div.addEventListener('click', function() {}); document.body.appendChild(div); document.body.removeChild(div); // Element removed but listener may persist 'orphan created' """ initial_listeners = browser_memory.event_listeners # Create multiple orphaned listeners for i in range(3): await get("http://localhost:8083/orphan-test", script=orphan_script) final_listeners = browser_memory.event_listeners # Should accumulate listeners even after element removal assert final_listeners > initial_listeners class TestResourceCleanup: """Test resource cleanup and session management""" @pytest.mark.asyncio async def test_session_resource_cleanup(self, memory_profiler, mock_browser_with_memory): """Test resource cleanup after session completion""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Simulate session with multiple operations urls = [f"http://localhost:8083/session-{i}" for i in range(5)] initial_memory = memory_profiler.get_memory_usage() # Process URLs contents = await get_many(urls) # Simulate memory allocation during processing memory_profiler.allocate(10_000_000) # 10MB allocated # Simulate session cleanup memory_profiler.trigger_gc() final_memory = memory_profiler.get_memory_usage() # Should show significant cleanup cleanup_amount = 10_000_000 * 0.7 # 70% cleanup expected_memory = initial_memory + 10_000_000 - cleanup_amount assert abs(final_memory - expected_memory) < 1_000_000 # Within 1MB assert len(contents) == 5 @pytest.mark.asyncio async def test_browser_instance_cleanup(self, mock_browser_with_memory): """Test browser instance resource cleanup""" cleanup_called = False async def mock_cleanup(): nonlocal cleanup_called cleanup_called = True mock_browser_with_memory.close = mock_cleanup with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Use browser instance await get("http://localhost:8083/cleanup-test") # Simulate browser cleanup await mock_browser_with_memory.close() assert cleanup_called @pytest.mark.asyncio async def test_concurrent_session_isolation(self, memory_profiler, mock_browser_with_memory): """Test memory isolation between concurrent sessions""" session_memories = [] async def session_task(session_id: int): # Each session processes some pages for i in range(3): await get(f"http://localhost:8083/session-{session_id}-page-{i}") memory_profiler.allocate(2_000_000) # 2MB per page session_memories.append(memory_profiler.get_memory_usage()) with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): initial_memory = memory_profiler.get_memory_usage() # Run concurrent sessions tasks = [session_task(i) for i in range(3)] await asyncio.gather(*tasks) final_memory = memory_profiler.get_memory_usage() total_growth = final_memory - initial_memory # Total growth should be sum of all sessions expected_growth = 3 * 3 * 2_000_000 # 3 sessions * 3 pages * 2MB assert abs(total_growth - expected_growth) < 2_000_000 # Within 2MB tolerance class TestLongRunningStability: """Test long-running session stability and memory management""" @pytest.mark.asyncio async def test_extended_session_stability(self, memory_profiler, mock_browser_with_memory): """Test memory stability over extended sessions""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): memory_samples = [] # Simulate extended session (50 operations) for i in range(50): await get(f"http://localhost:8083/extended-{i}") memory_profiler.allocate(1_000_000) # 1MB per operation # Trigger GC every 10 operations if i % 10 == 9: memory_profiler.trigger_gc() memory_samples.append(memory_profiler.get_memory_usage()) # Check for memory stability (no runaway growth) # After GC cycles, memory should stabilize recent_samples = memory_samples[-10:] # Last 10 samples memory_variance = max(recent_samples) - min(recent_samples) # Variance should be reasonable (under 10MB) assert memory_variance < 10_000_000 @pytest.mark.asyncio async def test_memory_pressure_handling(self, memory_profiler, mock_browser_with_memory): """Test handling of memory pressure conditions""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Simulate memory pressure scenario initial_memory = memory_profiler.get_memory_usage() # Allocate significant memory memory_profiler.allocate(100_000_000) # 100MB # Try to process page under memory pressure try: content = await get("http://localhost:8083/memory-pressure-test") # Should complete successfully assert content.content is not None # Trigger emergency GC memory_profiler.trigger_gc() # Memory should be reduced significantly final_memory = memory_profiler.get_memory_usage() reduction = (initial_memory + 100_000_000) - final_memory assert reduction > 50_000_000 # At least 50MB cleaned up except Exception as e: # Should handle memory pressure gracefully assert "memory" in str(e).lower() or "resource" in str(e).lower() @pytest.mark.asyncio async def test_batch_processing_memory_efficiency(self, memory_profiler, mock_browser_with_memory): """Test memory efficiency in batch processing scenarios""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): urls = [f"http://localhost:8083/batch-{i}" for i in range(20)] initial_memory = memory_profiler.get_memory_usage() # Process in batches with memory monitoring batch_size = 5 for i in range(0, len(urls), batch_size): batch_urls = urls[i:i+batch_size] contents = await get_many(batch_urls) # Simulate batch memory usage memory_profiler.allocate(batch_size * 2_000_000) # 2MB per URL # GC between batches memory_profiler.trigger_gc() assert len(contents) == len(batch_urls) final_memory = memory_profiler.get_memory_usage() total_growth = final_memory - initial_memory # With GC between batches, growth should be minimal assert total_growth < 20_000_000 # Under 20MB total growth class TestMemoryMetrics: """Test memory metrics and monitoring capabilities""" @pytest.mark.asyncio async def test_memory_usage_reporting(self, browser_memory, mock_browser_with_memory): """Test memory usage metrics reporting""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Execute script that reports memory metrics memory_script = """ ({ domNodes: document.querySelectorAll('*').length, heapSize: window.performance.memory ? window.performance.memory.usedJSHeapSize : 'unavailable', listeners: getEventListeners ? Object.keys(getEventListeners(document)).length : 'unavailable' }) """ content = await get("http://localhost:8083/memory-metrics", script=memory_script) # Should return memory metrics assert content.script_result is not None metrics = content.script_result assert 'domNodes' in metrics assert 'heapSize' in metrics assert 'listeners' in metrics @pytest.mark.asyncio async def test_performance_memory_api(self, mock_browser_with_memory): """Test Performance Memory API integration""" with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Test performance.memory API performance_script = """ if (window.performance && window.performance.memory) { ({ usedJSHeapSize: window.performance.memory.usedJSHeapSize, totalJSHeapSize: window.performance.memory.totalJSHeapSize, jsHeapSizeLimit: window.performance.memory.jsHeapSizeLimit }) } else { 'performance.memory not available' } """ content = await get("http://localhost:8083/performance-memory", script=performance_script) # Should report performance memory data or unavailability assert content.script_result is not None @pytest.mark.asyncio async def test_memory_threshold_monitoring(self, memory_profiler, mock_browser_with_memory): """Test memory threshold monitoring and alerts""" threshold = 75_000_000 # 75MB threshold with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory): # Process pages while monitoring threshold for i in range(30): await get(f"http://localhost:8083/threshold-{i}") memory_profiler.allocate(3_000_000) # 3MB per page current_memory = memory_profiler.get_memory_usage() if current_memory > threshold: # Trigger cleanup when threshold exceeded memory_profiler.trigger_gc() # Verify cleanup brought memory below threshold post_cleanup_memory = memory_profiler.get_memory_usage() # Should be significantly reduced assert post_cleanup_memory < threshold * 0.8 # Below 80% of threshold if __name__ == "__main__": # Demo script showing memory management testing print("🧠 Memory Management Test Suite") print("=" * 50) print() print("This test suite validates memory management and leak detection:") print() print("📊 Memory Baseline Tests:") print(" • Memory growth patterns over multiple operations") print(" • JavaScript execution memory overhead") print(" • Baseline establishment and maintenance") print() print("🌳 DOM Node Management:") print(" • DOM node accumulation and cleanup") print(" • Large DOM tree handling") print(" • Memory efficiency with complex pages") print() print("⚡ JavaScript Heap Management:") print(" • Heap growth and leak detection") print(" • Garbage collection effectiveness") print(" • Memory allocation patterns") print() print("🎧 Event Listener Management:") print(" • Listener accumulation tracking") print(" • Orphaned listener detection") print(" • Cleanup on navigation") print() print("🔄 Resource Cleanup:") print(" • Session resource management") print(" • Browser instance cleanup") print(" • Concurrent session isolation") print() print("⏱️ Long-Running Stability:") print(" • Extended session memory stability") print(" • Memory pressure handling") print(" • Batch processing efficiency") print() print("📈 Memory Metrics:") print(" • Performance Memory API integration") print(" • Threshold monitoring and alerts") print(" • Real-time memory usage reporting") print() print("Run with: pytest tests/test_memory_management.py -v") print() print("🎯 Production Benefits:") print(" • Prevents memory leaks in long-running processes") print(" • Ensures stable performance under load") print(" • Provides memory monitoring capabilities") print(" • Validates resource cleanup effectiveness")