""" Comprehensive edge case and error scenario testing for Crawailer JavaScript API. This test suite focuses on boundary conditions, malformed inputs, error handling, and unusual scenarios that could break the JavaScript execution functionality. """ import asyncio import json import pytest import time import os import tempfile from pathlib import Path from typing import Dict, Any, List, Optional from unittest.mock import AsyncMock, MagicMock, patch from concurrent.futures import ThreadPoolExecutor from crawailer import Browser, BrowserConfig from crawailer.content import WebContent, ContentExtractor from crawailer.api import get, get_many, discover from crawailer.utils import clean_text class TestMalformedJavaScriptCodes: """Test handling of malformed, invalid, or dangerous JavaScript code.""" @pytest.mark.asyncio async def test_syntax_error_javascript(self): """Test handling of JavaScript with syntax errors.""" browser = Browser(BrowserConfig()) # Mock browser setup mock_page = AsyncMock() mock_page.evaluate.side_effect = Exception("SyntaxError: Unexpected token '{'") mock_page.goto = AsyncMock() mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True # Test various syntax errors invalid_scripts = [ "function() { return 'missing name'; }", # Missing function name in declaration "if (true { console.log('missing paren'); }", # Missing closing parenthesis "var x = 'unclosed string;", # Unclosed string "function test() { return; extra_token }", # Extra token after return "{ invalid: json, syntax }", # Invalid object syntax "for (let i = 0; i < 10 i++) { }", # Missing semicolon "document.querySelector('div').map(x => x.text)", # Calling array method on NodeList ] for script in invalid_scripts: with pytest.raises(Exception) as exc_info: await browser.execute_script("https://example.com", script) # Should contain some form of syntax error information error_msg = str(exc_info.value).lower() assert any(keyword in error_msg for keyword in ["syntax", "unexpected", "error"]) @pytest.mark.asyncio async def test_infinite_loop_javascript(self): """Test handling of JavaScript with infinite loops.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() # Simulate timeout due to infinite loop mock_page.evaluate.side_effect = asyncio.TimeoutError("Script execution timeout") mock_page.goto = AsyncMock() mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True # Scripts that could cause infinite loops infinite_scripts = [ "while(true) { console.log('infinite'); }", "for(;;) { var x = 1; }", "function recurse() { recurse(); } recurse();", "let x = 0; while(x >= 0) { x++; }", ] for script in infinite_scripts: with pytest.raises(asyncio.TimeoutError): await browser.execute_script("https://example.com", script, timeout=1000) @pytest.mark.asyncio async def test_memory_exhaustion_javascript(self): """Test handling of JavaScript that attempts to exhaust memory.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() # Simulate out of memory error mock_page.evaluate.side_effect = Exception("RangeError: Maximum call stack size exceeded") mock_page.goto = AsyncMock() mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True # Scripts that could exhaust memory memory_exhausting_scripts = [ "var arr = []; while(true) { arr.push(new Array(1000000)); }", "var str = 'x'; while(true) { str += str; }", "var obj = {}; for(let i = 0; i < 1000000; i++) { obj[i] = new Array(1000); }", ] for script in memory_exhausting_scripts: with pytest.raises(Exception) as exc_info: await browser.execute_script("https://example.com", script) error_msg = str(exc_info.value).lower() assert any(keyword in error_msg for keyword in ["memory", "stack", "range", "error"]) @pytest.mark.asyncio async def test_unicode_and_special_characters(self): """Test JavaScript execution with Unicode and special characters.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True # Test various Unicode and special character scenarios unicode_scripts = [ "return '测试中文字符'", # Chinese characters "return 'emoji test 🚀🔥⭐'", # Emoji "return 'áéíóú ñ ü'", # Accented characters "return 'null\\x00char'", # Null character "return 'quote\\\"escape\\\"test'", # Escaped quotes "return `template\\nliteral\\twith\\ttabs`", # Template literal with escapes "return JSON.stringify({key: '测试', emoji: '🔥'})", # Unicode in JSON ] for i, script in enumerate(unicode_scripts): # Mock different return values for each test expected_results = [ "测试中文字符", "emoji test 🚀🔥⭐", "áéíóú ñ ü", "null\x00char", 'quote"escape"test', "template\nliteral\twith\ttabs", '{"key":"测试","emoji":"🔥"}' ] mock_page.evaluate.return_value = expected_results[i] result = await browser.execute_script("https://example.com", script) assert result == expected_results[i] @pytest.mark.asyncio async def test_extremely_large_javascript_results(self): """Test handling of JavaScript that returns extremely large data.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.close = AsyncMock() # Simulate large result (1MB string) large_result = "x" * (1024 * 1024) mock_page.evaluate.return_value = large_result mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True result = await browser.execute_script( "https://example.com", "return 'x'.repeat(1024 * 1024)" ) assert len(result) == 1024 * 1024 assert result == large_result @pytest.mark.asyncio async def test_circular_reference_javascript(self): """Test JavaScript that returns circular references.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.close = AsyncMock() # Mock error for circular reference mock_page.evaluate.side_effect = Exception("Converting circular structure to JSON") mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True circular_script = """ var obj = {}; obj.self = obj; return obj; """ with pytest.raises(Exception) as exc_info: await browser.execute_script("https://example.com", circular_script) assert "circular" in str(exc_info.value).lower() class TestNetworkFailureScenarios: """Test JavaScript execution during various network failure conditions.""" @pytest.mark.asyncio async def test_network_timeout_during_page_load(self): """Test script execution when page load times out.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto.side_effect = asyncio.TimeoutError("Navigation timeout") mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True with pytest.raises(asyncio.TimeoutError): await browser.execute_script( "https://very-slow-site.com", "return document.title", timeout=1000 ) @pytest.mark.asyncio async def test_dns_resolution_failure(self): """Test handling of DNS resolution failures.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto.side_effect = Exception("net::ERR_NAME_NOT_RESOLVED") mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True with pytest.raises(Exception) as exc_info: await browser.execute_script( "https://nonexistent-domain-12345.invalid", "return true" ) assert "name_not_resolved" in str(exc_info.value).lower() @pytest.mark.asyncio async def test_connection_refused(self): """Test handling of connection refused errors.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto.side_effect = Exception("net::ERR_CONNECTION_REFUSED") mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True with pytest.raises(Exception) as exc_info: await browser.execute_script( "http://localhost:99999", # Unlikely to be open "return document.body.innerHTML" ) assert "connection" in str(exc_info.value).lower() @pytest.mark.asyncio async def test_ssl_certificate_error(self): """Test handling of SSL certificate errors.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto.side_effect = Exception("net::ERR_CERT_AUTHORITY_INVALID") mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True with pytest.raises(Exception) as exc_info: await browser.execute_script( "https://self-signed.badssl.com/", "return location.hostname" ) error_msg = str(exc_info.value).lower() assert any(keyword in error_msg for keyword in ["cert", "ssl", "authority"]) @pytest.mark.asyncio async def test_network_interruption_during_script(self): """Test network interruption while script is executing.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.close = AsyncMock() # Simulate network interruption during script execution mock_page.evaluate.side_effect = Exception("net::ERR_NETWORK_CHANGED") mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True with pytest.raises(Exception) as exc_info: await browser.execute_script( "https://example.com", "await fetch('/api/data'); return 'success'" ) assert "network" in str(exc_info.value).lower() class TestConcurrencyAndResourceLimits: """Test concurrent execution and resource management.""" @pytest.mark.asyncio async def test_concurrent_script_execution_limits(self): """Test behavior at concurrency limits.""" browser = Browser(BrowserConfig()) # Mock setup for multiple concurrent requests mock_pages = [] for i in range(20): # Create 20 mock pages mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.evaluate.return_value = f"result_{i}" mock_page.close = AsyncMock() mock_pages.append(mock_page) mock_browser = AsyncMock() mock_browser.new_page.side_effect = mock_pages browser._browser = mock_browser browser._is_started = True # Launch many concurrent script executions tasks = [] for i in range(20): task = browser.execute_script( f"https://example.com/page{i}", f"return 'result_{i}'" ) tasks.append(task) # Should handle all concurrent requests results = await asyncio.gather(*tasks, return_exceptions=True) # Count successful results vs exceptions successful = [r for r in results if not isinstance(r, Exception)] errors = [r for r in results if isinstance(r, Exception)] # Most should succeed, but some might fail due to resource limits assert len(successful) >= 10 # At least half should succeed assert len(errors) <= 10 # Not all should fail @pytest.mark.asyncio async def test_browser_crash_recovery(self): """Test recovery when browser process crashes.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.close = AsyncMock() # First call succeeds mock_page.evaluate.return_value = "success" mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True # First execution succeeds result1 = await browser.execute_script("https://example.com", "return 'success'") assert result1 == "success" # Simulate browser crash on second call mock_page.evaluate.side_effect = Exception("Browser process crashed") with pytest.raises(Exception) as exc_info: await browser.execute_script("https://example.com", "return 'test'") assert "crashed" in str(exc_info.value).lower() @pytest.mark.asyncio async def test_memory_leak_prevention(self): """Test that pages are properly cleaned up to prevent memory leaks.""" browser = Browser(BrowserConfig()) created_pages = [] def create_mock_page(): mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.evaluate.return_value = "success" mock_page.close = AsyncMock() created_pages.append(mock_page) return mock_page mock_browser = AsyncMock() mock_browser.new_page.side_effect = create_mock_page browser._browser = mock_browser browser._is_started = True # Execute multiple scripts for i in range(10): await browser.execute_script(f"https://example.com/page{i}", "return 'test'") # Verify all pages were closed assert len(created_pages) == 10 for page in created_pages: page.close.assert_called_once() @pytest.mark.asyncio async def test_page_resource_exhaustion(self): """Test handling when page resources are exhausted.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.goto = AsyncMock() mock_page.close = AsyncMock() # Simulate resource exhaustion mock_page.evaluate.side_effect = Exception("Target closed") mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True with pytest.raises(Exception) as exc_info: await browser.execute_script("https://example.com", "return 'test'") assert "closed" in str(exc_info.value).lower() class TestInvalidParameterCombinations: """Test various invalid parameter combinations and edge cases.""" @pytest.mark.asyncio async def test_invalid_urls(self): """Test handling of various invalid URL formats.""" browser = Browser(BrowserConfig()) mock_page = AsyncMock() mock_page.close = AsyncMock() mock_browser = AsyncMock() mock_browser.new_page.return_value = mock_page browser._browser = mock_browser browser._is_started = True invalid_urls = [ "", # Empty string "not-a-url", # Not a URL "ftp://example.com", # Unsupported protocol "javascript:alert('test')", # JavaScript URL "data:text/html,