
- Complete browser automation with Playwright integration - High-level API functions: get(), get_many(), discover() - JavaScript execution support with script parameters - Content extraction optimized for LLM workflows - Comprehensive test suite with 18 test files (700+ scenarios) - Local Caddy test server for reproducible testing - Performance benchmarking vs Katana crawler - Complete documentation including JavaScript API guide - PyPI-ready packaging with professional metadata - UNIX philosophy: do web scraping exceptionally well
730 lines
30 KiB
Python
730 lines
30 KiB
Python
"""
|
|
Memory Management and Leak Detection Tests
|
|
|
|
Tests for memory usage patterns, leak detection, and resource cleanup
|
|
in browser automation scenarios. Critical for production deployments
|
|
that need to handle long-running operations without memory bloat.
|
|
|
|
Test Categories:
|
|
- Memory baseline and growth patterns
|
|
- DOM node accumulation and cleanup
|
|
- JavaScript heap management
|
|
- Event listener leak detection
|
|
- Resource cleanup validation
|
|
- Long-running session stability
|
|
- Memory pressure handling
|
|
- Garbage collection effectiveness
|
|
"""
|
|
|
|
import pytest
|
|
import asyncio
|
|
import gc
|
|
import psutil
|
|
import os
|
|
from unittest.mock import Mock, patch, AsyncMock
|
|
from typing import List, Dict, Any
|
|
|
|
from crawailer import get, get_many, discover
|
|
from crawailer.browser import Browser
|
|
from crawailer.config import BrowserConfig
|
|
|
|
|
|
class MockMemoryProfiler:
|
|
"""Mock memory profiler for testing memory patterns"""
|
|
|
|
def __init__(self):
|
|
self.baseline = 50_000_000 # 50MB baseline
|
|
self.current = self.baseline
|
|
self.peak = self.baseline
|
|
self.allocations = []
|
|
|
|
def get_memory_usage(self) -> int:
|
|
"""Get current memory usage in bytes"""
|
|
return self.current
|
|
|
|
def allocate(self, size: int):
|
|
"""Simulate memory allocation"""
|
|
self.current += size
|
|
self.peak = max(self.peak, self.current)
|
|
self.allocations.append(size)
|
|
|
|
def deallocate(self, size: int):
|
|
"""Simulate memory deallocation"""
|
|
self.current = max(self.baseline, self.current - size)
|
|
|
|
def trigger_gc(self):
|
|
"""Simulate garbage collection"""
|
|
# Cleanup 70% of non-baseline memory
|
|
excess = self.current - self.baseline
|
|
if excess > 0:
|
|
cleanup = int(excess * 0.7)
|
|
self.current -= cleanup
|
|
|
|
|
|
class MockBrowserMemory:
|
|
"""Mock browser memory tracking"""
|
|
|
|
def __init__(self):
|
|
self.dom_nodes = 1000 # Initial DOM nodes
|
|
self.js_heap_size = 10_000_000 # 10MB
|
|
self.event_listeners = 50
|
|
self.network_connections = 0
|
|
self.active_timers = 0
|
|
|
|
def add_dom_nodes(self, count: int):
|
|
self.dom_nodes += count
|
|
|
|
def remove_dom_nodes(self, count: int):
|
|
self.dom_nodes = max(1000, self.dom_nodes - count)
|
|
|
|
def allocate_js_heap(self, size: int):
|
|
self.js_heap_size += size
|
|
|
|
def add_event_listeners(self, count: int):
|
|
self.event_listeners += count
|
|
|
|
def cleanup_listeners(self, count: int):
|
|
self.event_listeners = max(50, self.event_listeners - count)
|
|
|
|
|
|
@pytest.fixture
|
|
def memory_profiler():
|
|
"""Memory profiler fixture"""
|
|
return MockMemoryProfiler()
|
|
|
|
|
|
@pytest.fixture
|
|
def browser_memory():
|
|
"""Browser memory tracking fixture"""
|
|
return MockBrowserMemory()
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_browser_with_memory(browser_memory):
|
|
"""Browser with memory tracking"""
|
|
browser = Mock()
|
|
browser.memory = browser_memory
|
|
|
|
async def mock_fetch_page(url, **kwargs):
|
|
# Simulate memory allocation during page load
|
|
browser.memory.add_dom_nodes(500)
|
|
browser.memory.allocate_js_heap(1_000_000)
|
|
browser.memory.add_event_listeners(10)
|
|
|
|
script_result = None
|
|
if 'script_after' in kwargs:
|
|
script = kwargs['script_after']
|
|
if 'memory' in script.lower():
|
|
script_result = {
|
|
'domNodes': browser.memory.dom_nodes,
|
|
'heapSize': browser.memory.js_heap_size,
|
|
'listeners': browser.memory.event_listeners
|
|
}
|
|
elif 'leak' in script.lower():
|
|
# Simulate memory leak
|
|
browser.memory.add_dom_nodes(1000)
|
|
browser.memory.allocate_js_heap(5_000_000)
|
|
script_result = {'leaked': True}
|
|
|
|
return Mock(
|
|
content="<html><body>Memory test page</body></html>",
|
|
url=url,
|
|
script_result=script_result,
|
|
status_code=200
|
|
)
|
|
|
|
browser.fetch_page = mock_fetch_page
|
|
return browser
|
|
|
|
|
|
class TestMemoryBaseline:
|
|
"""Test memory baseline and growth patterns"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_baseline_establishment(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test establishing memory usage baseline"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
initial_memory = memory_profiler.get_memory_usage()
|
|
|
|
# Single page load should have predictable memory usage
|
|
content = await get("http://localhost:8083/memory-test")
|
|
|
|
# Simulate some memory allocation for page processing
|
|
memory_profiler.allocate(2_000_000) # 2MB for page processing
|
|
|
|
final_memory = memory_profiler.get_memory_usage()
|
|
memory_growth = final_memory - initial_memory
|
|
|
|
# Memory growth should be reasonable (under 5MB for single page)
|
|
assert memory_growth < 5_000_000
|
|
assert content.content is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_growth_patterns(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test memory growth patterns over multiple operations"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
baseline = memory_profiler.get_memory_usage()
|
|
measurements = [baseline]
|
|
|
|
# Process multiple pages and track memory growth
|
|
urls = [f"http://localhost:8083/page-{i}" for i in range(10)]
|
|
|
|
for i, url in enumerate(urls):
|
|
await get(url)
|
|
# Simulate incremental memory usage
|
|
memory_profiler.allocate(1_500_000) # 1.5MB per page
|
|
measurements.append(memory_profiler.get_memory_usage())
|
|
|
|
# Check for linear vs exponential growth
|
|
growth_rates = []
|
|
for i in range(1, len(measurements)):
|
|
rate = measurements[i] - measurements[i-1]
|
|
growth_rates.append(rate)
|
|
|
|
# Growth should be roughly linear, not exponential
|
|
avg_growth = sum(growth_rates) / len(growth_rates)
|
|
for rate in growth_rates[-3:]: # Check last 3 measurements
|
|
assert abs(rate - avg_growth) < avg_growth * 0.5 # Within 50% of average
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_with_javascript_execution(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test memory usage with JavaScript execution"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
baseline = memory_profiler.get_memory_usage()
|
|
|
|
# Execute JavaScript that reports memory usage
|
|
content = await get(
|
|
"http://localhost:8083/js-memory-test",
|
|
script="window.performance.memory ? window.performance.memory.usedJSHeapSize : 'unavailable'"
|
|
)
|
|
|
|
# Simulate JS execution memory overhead
|
|
memory_profiler.allocate(3_000_000) # 3MB for JS execution
|
|
|
|
final_memory = memory_profiler.get_memory_usage()
|
|
js_overhead = final_memory - baseline
|
|
|
|
# JS execution should have reasonable overhead
|
|
assert js_overhead < 10_000_000 # Under 10MB
|
|
assert content.script_result is not None
|
|
|
|
|
|
class TestDOMNodeManagement:
|
|
"""Test DOM node accumulation and cleanup"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dom_node_accumulation(self, browser_memory, mock_browser_with_memory):
|
|
"""Test DOM node accumulation over multiple page loads"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
initial_nodes = browser_memory.dom_nodes
|
|
|
|
# Load pages with varying DOM complexity
|
|
urls = [
|
|
"http://localhost:8083/simple-page", # 500 nodes
|
|
"http://localhost:8083/complex-page", # 500 nodes
|
|
"http://localhost:8083/heavy-page" # 500 nodes
|
|
]
|
|
|
|
for url in urls:
|
|
await get(url)
|
|
|
|
final_nodes = browser_memory.dom_nodes
|
|
node_growth = final_nodes - initial_nodes
|
|
|
|
# Should accumulate nodes (1500 added)
|
|
assert node_growth == 1500
|
|
assert final_nodes == 2500
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dom_cleanup_between_pages(self, browser_memory, mock_browser_with_memory):
|
|
"""Test DOM cleanup between page navigations"""
|
|
# Modify mock to simulate cleanup
|
|
original_fetch = mock_browser_with_memory.fetch_page
|
|
|
|
async def fetch_with_cleanup(url, **kwargs):
|
|
# Cleanup previous page DOM nodes (simulate navigation)
|
|
if browser_memory.dom_nodes > 1000:
|
|
cleanup_nodes = min(500, browser_memory.dom_nodes - 1000)
|
|
browser_memory.remove_dom_nodes(cleanup_nodes)
|
|
|
|
return await original_fetch(url, **kwargs)
|
|
|
|
mock_browser_with_memory.fetch_page = fetch_with_cleanup
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Load multiple pages with cleanup
|
|
for i in range(5):
|
|
await get(f"http://localhost:8083/page-{i}")
|
|
|
|
# Should maintain reasonable DOM node count
|
|
assert browser_memory.dom_nodes < 3000 # Not unlimited growth
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_large_dom_handling(self, browser_memory, mock_browser_with_memory):
|
|
"""Test handling of pages with very large DOM trees"""
|
|
# Simulate large page
|
|
async def fetch_large_page(url, **kwargs):
|
|
if 'large' in url:
|
|
browser_memory.add_dom_nodes(10000) # Very large page
|
|
else:
|
|
browser_memory.add_dom_nodes(500) # Normal page
|
|
|
|
return Mock(
|
|
content="<html><body>Large DOM test</body></html>",
|
|
url=url,
|
|
status_code=200
|
|
)
|
|
|
|
mock_browser_with_memory.fetch_page = fetch_large_page
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
initial_nodes = browser_memory.dom_nodes
|
|
|
|
# Load large page
|
|
content = await get("http://localhost:8083/large-dom-page")
|
|
|
|
final_nodes = browser_memory.dom_nodes
|
|
|
|
assert final_nodes - initial_nodes == 10000
|
|
assert content.content is not None
|
|
|
|
|
|
class TestJavaScriptHeapManagement:
|
|
"""Test JavaScript heap memory management"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_js_heap_growth(self, browser_memory, mock_browser_with_memory):
|
|
"""Test JavaScript heap growth patterns"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
initial_heap = browser_memory.js_heap_size
|
|
|
|
# Execute scripts that allocate memory
|
|
memory_scripts = [
|
|
"new Array(100000).fill('data')", # Allocate array
|
|
"Object.assign({}, ...new Array(1000).fill({key: 'value'}))", # Object allocation
|
|
"document.querySelectorAll('*').length" # DOM query
|
|
]
|
|
|
|
for script in memory_scripts:
|
|
await get("http://localhost:8083/js-test", script=script)
|
|
|
|
final_heap = browser_memory.js_heap_size
|
|
heap_growth = final_heap - initial_heap
|
|
|
|
# Should show measurable heap growth
|
|
assert heap_growth == 3_000_000 # 1MB per script execution
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_js_memory_leak_detection(self, browser_memory, mock_browser_with_memory):
|
|
"""Test detection of JavaScript memory leaks"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Execute script that creates potential leak
|
|
leak_script = """
|
|
// Simulate memory leak pattern
|
|
window.leakyData = window.leakyData || [];
|
|
window.leakyData.push(new Array(10000).fill('leak'));
|
|
'leak created'
|
|
"""
|
|
|
|
initial_heap = browser_memory.js_heap_size
|
|
|
|
# Execute leak script multiple times
|
|
for i in range(3):
|
|
content = await get("http://localhost:8083/leak-test", script=leak_script)
|
|
|
|
final_heap = browser_memory.js_heap_size
|
|
leak_growth = final_heap - initial_heap
|
|
|
|
# Should detect significant memory growth
|
|
assert leak_growth >= 15_000_000 # Significant growth indicates leak
|
|
assert content.script_result == {'leaked': True}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_js_garbage_collection(self, browser_memory, mock_browser_with_memory):
|
|
"""Test JavaScript garbage collection effectiveness"""
|
|
# Add GC simulation to mock
|
|
async def fetch_with_gc(url, **kwargs):
|
|
result = await mock_browser_with_memory.fetch_page(url, **kwargs)
|
|
|
|
# Simulate GC trigger after script execution
|
|
if 'script_after' in kwargs and 'gc' in kwargs['script_after'].lower():
|
|
# Simulate GC cleanup (reduce heap by 50%)
|
|
excess_heap = browser_memory.js_heap_size - 10_000_000
|
|
if excess_heap > 0:
|
|
browser_memory.js_heap_size -= int(excess_heap * 0.5)
|
|
|
|
return result
|
|
|
|
mock_browser_with_memory.fetch_page = fetch_with_gc
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Allocate memory then trigger GC
|
|
await get("http://localhost:8083/allocate", script="new Array(1000000).fill('data')")
|
|
pre_gc_heap = browser_memory.js_heap_size
|
|
|
|
await get("http://localhost:8083/gc-test", script="if (window.gc) window.gc(); 'gc triggered'")
|
|
post_gc_heap = browser_memory.js_heap_size
|
|
|
|
# GC should reduce heap size
|
|
assert post_gc_heap < pre_gc_heap
|
|
|
|
|
|
class TestEventListenerLeaks:
|
|
"""Test event listener leak detection and cleanup"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_event_listener_accumulation(self, browser_memory, mock_browser_with_memory):
|
|
"""Test event listener accumulation patterns"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
initial_listeners = browser_memory.event_listeners
|
|
|
|
# Execute scripts that add event listeners
|
|
listener_scripts = [
|
|
"document.addEventListener('click', function() {})",
|
|
"window.addEventListener('resize', function() {})",
|
|
"document.body.addEventListener('mouseover', function() {})"
|
|
]
|
|
|
|
for script in listener_scripts:
|
|
await get("http://localhost:8083/listener-test", script=script)
|
|
|
|
final_listeners = browser_memory.event_listeners
|
|
listener_growth = final_listeners - initial_listeners
|
|
|
|
# Should accumulate listeners (10 per page + 3 custom = 33)
|
|
assert listener_growth == 33
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_listener_cleanup_on_navigation(self, browser_memory, mock_browser_with_memory):
|
|
"""Test listener cleanup during page navigation"""
|
|
# Modify mock to simulate listener cleanup
|
|
navigation_count = 0
|
|
|
|
async def fetch_with_listener_cleanup(url, **kwargs):
|
|
nonlocal navigation_count
|
|
navigation_count += 1
|
|
|
|
# Cleanup listeners on navigation (every 2nd navigation)
|
|
if navigation_count % 2 == 0 and browser_memory.event_listeners > 50:
|
|
cleanup_count = min(20, browser_memory.event_listeners - 50)
|
|
browser_memory.cleanup_listeners(cleanup_count)
|
|
|
|
return await mock_browser_with_memory.fetch_page(url, **kwargs)
|
|
|
|
mock_browser_with_memory.fetch_page = fetch_with_listener_cleanup
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Navigate multiple times
|
|
for i in range(6):
|
|
await get(f"http://localhost:8083/nav-test-{i}")
|
|
|
|
# Should show periodic cleanup
|
|
assert browser_memory.event_listeners < 120 # Not unlimited growth
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_orphaned_listener_detection(self, browser_memory, mock_browser_with_memory):
|
|
"""Test detection of orphaned event listeners"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Create scenario with orphaned listeners
|
|
orphan_script = """
|
|
// Create elements, add listeners, then remove elements (orphaning listeners)
|
|
const div = document.createElement('div');
|
|
div.addEventListener('click', function() {});
|
|
document.body.appendChild(div);
|
|
document.body.removeChild(div); // Element removed but listener may persist
|
|
'orphan created'
|
|
"""
|
|
|
|
initial_listeners = browser_memory.event_listeners
|
|
|
|
# Create multiple orphaned listeners
|
|
for i in range(3):
|
|
await get("http://localhost:8083/orphan-test", script=orphan_script)
|
|
|
|
final_listeners = browser_memory.event_listeners
|
|
|
|
# Should accumulate listeners even after element removal
|
|
assert final_listeners > initial_listeners
|
|
|
|
|
|
class TestResourceCleanup:
|
|
"""Test resource cleanup and session management"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_session_resource_cleanup(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test resource cleanup after session completion"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Simulate session with multiple operations
|
|
urls = [f"http://localhost:8083/session-{i}" for i in range(5)]
|
|
|
|
initial_memory = memory_profiler.get_memory_usage()
|
|
|
|
# Process URLs
|
|
contents = await get_many(urls)
|
|
|
|
# Simulate memory allocation during processing
|
|
memory_profiler.allocate(10_000_000) # 10MB allocated
|
|
|
|
# Simulate session cleanup
|
|
memory_profiler.trigger_gc()
|
|
|
|
final_memory = memory_profiler.get_memory_usage()
|
|
|
|
# Should show significant cleanup
|
|
cleanup_amount = 10_000_000 * 0.7 # 70% cleanup
|
|
expected_memory = initial_memory + 10_000_000 - cleanup_amount
|
|
|
|
assert abs(final_memory - expected_memory) < 1_000_000 # Within 1MB
|
|
assert len(contents) == 5
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_browser_instance_cleanup(self, mock_browser_with_memory):
|
|
"""Test browser instance resource cleanup"""
|
|
cleanup_called = False
|
|
|
|
async def mock_cleanup():
|
|
nonlocal cleanup_called
|
|
cleanup_called = True
|
|
|
|
mock_browser_with_memory.close = mock_cleanup
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Use browser instance
|
|
await get("http://localhost:8083/cleanup-test")
|
|
|
|
# Simulate browser cleanup
|
|
await mock_browser_with_memory.close()
|
|
|
|
assert cleanup_called
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_session_isolation(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test memory isolation between concurrent sessions"""
|
|
session_memories = []
|
|
|
|
async def session_task(session_id: int):
|
|
# Each session processes some pages
|
|
for i in range(3):
|
|
await get(f"http://localhost:8083/session-{session_id}-page-{i}")
|
|
memory_profiler.allocate(2_000_000) # 2MB per page
|
|
|
|
session_memories.append(memory_profiler.get_memory_usage())
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
initial_memory = memory_profiler.get_memory_usage()
|
|
|
|
# Run concurrent sessions
|
|
tasks = [session_task(i) for i in range(3)]
|
|
await asyncio.gather(*tasks)
|
|
|
|
final_memory = memory_profiler.get_memory_usage()
|
|
total_growth = final_memory - initial_memory
|
|
|
|
# Total growth should be sum of all sessions
|
|
expected_growth = 3 * 3 * 2_000_000 # 3 sessions * 3 pages * 2MB
|
|
assert abs(total_growth - expected_growth) < 2_000_000 # Within 2MB tolerance
|
|
|
|
|
|
class TestLongRunningStability:
|
|
"""Test long-running session stability and memory management"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extended_session_stability(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test memory stability over extended sessions"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
memory_samples = []
|
|
|
|
# Simulate extended session (50 operations)
|
|
for i in range(50):
|
|
await get(f"http://localhost:8083/extended-{i}")
|
|
memory_profiler.allocate(1_000_000) # 1MB per operation
|
|
|
|
# Trigger GC every 10 operations
|
|
if i % 10 == 9:
|
|
memory_profiler.trigger_gc()
|
|
|
|
memory_samples.append(memory_profiler.get_memory_usage())
|
|
|
|
# Check for memory stability (no runaway growth)
|
|
# After GC cycles, memory should stabilize
|
|
recent_samples = memory_samples[-10:] # Last 10 samples
|
|
memory_variance = max(recent_samples) - min(recent_samples)
|
|
|
|
# Variance should be reasonable (under 10MB)
|
|
assert memory_variance < 10_000_000
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_pressure_handling(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test handling of memory pressure conditions"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Simulate memory pressure scenario
|
|
initial_memory = memory_profiler.get_memory_usage()
|
|
|
|
# Allocate significant memory
|
|
memory_profiler.allocate(100_000_000) # 100MB
|
|
|
|
# Try to process page under memory pressure
|
|
try:
|
|
content = await get("http://localhost:8083/memory-pressure-test")
|
|
# Should complete successfully
|
|
assert content.content is not None
|
|
|
|
# Trigger emergency GC
|
|
memory_profiler.trigger_gc()
|
|
|
|
# Memory should be reduced significantly
|
|
final_memory = memory_profiler.get_memory_usage()
|
|
reduction = (initial_memory + 100_000_000) - final_memory
|
|
assert reduction > 50_000_000 # At least 50MB cleaned up
|
|
|
|
except Exception as e:
|
|
# Should handle memory pressure gracefully
|
|
assert "memory" in str(e).lower() or "resource" in str(e).lower()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_batch_processing_memory_efficiency(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test memory efficiency in batch processing scenarios"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
urls = [f"http://localhost:8083/batch-{i}" for i in range(20)]
|
|
|
|
initial_memory = memory_profiler.get_memory_usage()
|
|
|
|
# Process in batches with memory monitoring
|
|
batch_size = 5
|
|
for i in range(0, len(urls), batch_size):
|
|
batch_urls = urls[i:i+batch_size]
|
|
contents = await get_many(batch_urls)
|
|
|
|
# Simulate batch memory usage
|
|
memory_profiler.allocate(batch_size * 2_000_000) # 2MB per URL
|
|
|
|
# GC between batches
|
|
memory_profiler.trigger_gc()
|
|
|
|
assert len(contents) == len(batch_urls)
|
|
|
|
final_memory = memory_profiler.get_memory_usage()
|
|
total_growth = final_memory - initial_memory
|
|
|
|
# With GC between batches, growth should be minimal
|
|
assert total_growth < 20_000_000 # Under 20MB total growth
|
|
|
|
|
|
class TestMemoryMetrics:
|
|
"""Test memory metrics and monitoring capabilities"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_usage_reporting(self, browser_memory, mock_browser_with_memory):
|
|
"""Test memory usage metrics reporting"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Execute script that reports memory metrics
|
|
memory_script = """
|
|
({
|
|
domNodes: document.querySelectorAll('*').length,
|
|
heapSize: window.performance.memory ? window.performance.memory.usedJSHeapSize : 'unavailable',
|
|
listeners: getEventListeners ? Object.keys(getEventListeners(document)).length : 'unavailable'
|
|
})
|
|
"""
|
|
|
|
content = await get("http://localhost:8083/memory-metrics", script=memory_script)
|
|
|
|
# Should return memory metrics
|
|
assert content.script_result is not None
|
|
metrics = content.script_result
|
|
assert 'domNodes' in metrics
|
|
assert 'heapSize' in metrics
|
|
assert 'listeners' in metrics
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_performance_memory_api(self, mock_browser_with_memory):
|
|
"""Test Performance Memory API integration"""
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Test performance.memory API
|
|
performance_script = """
|
|
if (window.performance && window.performance.memory) {
|
|
({
|
|
usedJSHeapSize: window.performance.memory.usedJSHeapSize,
|
|
totalJSHeapSize: window.performance.memory.totalJSHeapSize,
|
|
jsHeapSizeLimit: window.performance.memory.jsHeapSizeLimit
|
|
})
|
|
} else {
|
|
'performance.memory not available'
|
|
}
|
|
"""
|
|
|
|
content = await get("http://localhost:8083/performance-memory", script=performance_script)
|
|
|
|
# Should report performance memory data or unavailability
|
|
assert content.script_result is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_threshold_monitoring(self, memory_profiler, mock_browser_with_memory):
|
|
"""Test memory threshold monitoring and alerts"""
|
|
threshold = 75_000_000 # 75MB threshold
|
|
|
|
with patch('crawailer.browser.Browser', return_value=mock_browser_with_memory):
|
|
# Process pages while monitoring threshold
|
|
for i in range(30):
|
|
await get(f"http://localhost:8083/threshold-{i}")
|
|
memory_profiler.allocate(3_000_000) # 3MB per page
|
|
|
|
current_memory = memory_profiler.get_memory_usage()
|
|
if current_memory > threshold:
|
|
# Trigger cleanup when threshold exceeded
|
|
memory_profiler.trigger_gc()
|
|
|
|
# Verify cleanup brought memory below threshold
|
|
post_cleanup_memory = memory_profiler.get_memory_usage()
|
|
# Should be significantly reduced
|
|
assert post_cleanup_memory < threshold * 0.8 # Below 80% of threshold
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Demo script showing memory management testing
|
|
print("🧠 Memory Management Test Suite")
|
|
print("=" * 50)
|
|
print()
|
|
print("This test suite validates memory management and leak detection:")
|
|
print()
|
|
print("📊 Memory Baseline Tests:")
|
|
print(" • Memory growth patterns over multiple operations")
|
|
print(" • JavaScript execution memory overhead")
|
|
print(" • Baseline establishment and maintenance")
|
|
print()
|
|
print("🌳 DOM Node Management:")
|
|
print(" • DOM node accumulation and cleanup")
|
|
print(" • Large DOM tree handling")
|
|
print(" • Memory efficiency with complex pages")
|
|
print()
|
|
print("⚡ JavaScript Heap Management:")
|
|
print(" • Heap growth and leak detection")
|
|
print(" • Garbage collection effectiveness")
|
|
print(" • Memory allocation patterns")
|
|
print()
|
|
print("🎧 Event Listener Management:")
|
|
print(" • Listener accumulation tracking")
|
|
print(" • Orphaned listener detection")
|
|
print(" • Cleanup on navigation")
|
|
print()
|
|
print("🔄 Resource Cleanup:")
|
|
print(" • Session resource management")
|
|
print(" • Browser instance cleanup")
|
|
print(" • Concurrent session isolation")
|
|
print()
|
|
print("⏱️ Long-Running Stability:")
|
|
print(" • Extended session memory stability")
|
|
print(" • Memory pressure handling")
|
|
print(" • Batch processing efficiency")
|
|
print()
|
|
print("📈 Memory Metrics:")
|
|
print(" • Performance Memory API integration")
|
|
print(" • Threshold monitoring and alerts")
|
|
print(" • Real-time memory usage reporting")
|
|
print()
|
|
print("Run with: pytest tests/test_memory_management.py -v")
|
|
print()
|
|
print("🎯 Production Benefits:")
|
|
print(" • Prevents memory leaks in long-running processes")
|
|
print(" • Ensures stable performance under load")
|
|
print(" • Provides memory monitoring capabilities")
|
|
print(" • Validates resource cleanup effectiveness") |