crawailer/minimal_failing_test.py

#!/usr/bin/env python3
"""
Minimal test that will actually fail against current implementation.
This demonstrates our test-driven development approach works.
"""

import sys
import traceback

def test_webcontent_script_fields():
    """Test that will fail because WebContent doesn't have script fields."""
    print("🧪 Testing WebContent script_result field...")

    try:
        # This should fail because script_result isn't implemented
        from dataclasses import dataclass
        from typing import Optional, Any
        from datetime import datetime

        @dataclass
        class TestWebContent:
            """Simulated current WebContent structure."""
            url: str
            title: str
            text: str
            markdown: str
            html: str
            # Missing: script_result and script_error fields

        # This will succeed
        content = TestWebContent(
            url="https://example.com",
            title="Test",
            text="content",
            markdown="# Test",
            html="<html></html>"
        )
        print("✅ Basic WebContent creation works")

        # This will fail - no script_result attribute
        try:
            result = content.script_result  # Should fail!
            print(f"❌ UNEXPECTED: script_result exists: {result}")
            return False
        except AttributeError:
            print("✅ EXPECTED FAILURE: script_result field missing")
            return True

    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        traceback.print_exc()
        return False

def test_enhanced_api_signature():
    """Test that will fail because API doesn't accept script parameters."""
    print("\n🧪 Testing enhanced get() signature...")

    try:
        def current_get(url, *, wait_for=None, timeout=30, clean=True,
                       extract_links=True, extract_metadata=True):
            """Current get() function signature."""
            return {"url": url, "params": locals()}

        # This should work (current API)
        result = current_get("https://example.com")
        print("✅ Current API signature works")

        # This should fail (enhanced API)
        try:
            result = current_get(
                "https://example.com",
                script="document.title"  # Should fail!
            )
            print(f"❌ UNEXPECTED: script parameter accepted: {result}")
            return False
        except TypeError as e:
            print(f"✅ EXPECTED FAILURE: script parameter rejected: {e}")
            return True

    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        return False

def test_mock_comprehensive_scenario():
    """Test a realistic scenario that should fail."""
    print("\n🧪 Testing comprehensive JavaScript scenario...")

    try:
        # Simulate trying to use our enhanced API
        def mock_enhanced_get(url, **kwargs):
            """Mock enhanced get that should reject script params."""
            allowed_params = {'wait_for', 'timeout', 'clean', 'extract_links', 'extract_metadata'}
            script_params = {'script', 'script_before', 'script_after'}

            provided_script_params = set(kwargs.keys()) & script_params
            if provided_script_params:
                raise TypeError(f"Unexpected keyword arguments: {provided_script_params}")

            return {"url": url, "success": True}

        # This should work
        result = mock_enhanced_get("https://example.com", wait_for=".content")
        print("✅ Basic usage works")

        # This should fail
        try:
            result = mock_enhanced_get(
                "https://shop.com/product",
                script="document.querySelector('.price').innerText",
                wait_for=".price-loaded"
            )
            print(f"❌ UNEXPECTED: JavaScript parameters accepted: {result}")
            return False
        except TypeError as e:
            print(f"✅ EXPECTED FAILURE: JavaScript parameters rejected: {e}")
            return True

    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        return False

def test_batch_scenario():
    """Test batch processing scenario that should fail."""
    print("\n🧪 Testing batch JavaScript scenario...")

    try:
        def mock_get_many(urls, **kwargs):
            """Mock get_many that should reject script param."""
            if 'script' in kwargs:
                raise TypeError("get_many() got an unexpected keyword argument 'script'")
            return [{"url": url, "success": True} for url in urls]

        # This should work
        urls = ["https://site1.com", "https://site2.com"]
        result = mock_get_many(urls, max_concurrent=2)
        print(f"✅ Basic batch processing works: {len(result)} results")

        # This should fail
        try:
            scripts = ["script1", "script2"]
            result = mock_get_many(urls, script=scripts)
            print(f"❌ UNEXPECTED: script parameter accepted: {result}")
            return False
        except TypeError as e:
            print(f"✅ EXPECTED FAILURE: script parameter rejected: {e}")
            return True

    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        return False

def main():
    """Run minimal failing tests to prove our approach."""
    print("🎯 Minimal Failing Test Suite")
    print("=" * 40)
    print("These tests SHOULD fail against current implementation!\n")

    tests = [
        ("WebContent Script Fields", test_webcontent_script_fields),
        ("Enhanced API Signature", test_enhanced_api_signature),
        ("Comprehensive Scenario", test_mock_comprehensive_scenario),
        ("Batch Processing", test_batch_scenario)
    ]

    results = []
    for name, test_func in tests:
        print(f"\n{'='*50}")
        print(f"Running: {name}")
        print('='*50)

        try:
            success = test_func()
            results.append((name, success, None))
        except Exception as e:
            results.append((name, False, str(e)))
            print(f"❌ Test crashed: {e}")

    print(f"\n{'='*50}")
    print("TEST RESULTS SUMMARY")
    print('='*50)

    expected_failures = 0
    unexpected_results = 0

    for name, success, error in results:
        if success:
            print(f"✅ {name}: FAILED AS EXPECTED")
            expected_failures += 1
        else:
            print(f"❌ {name}: UNEXPECTED RESULT")
            unexpected_results += 1
            if error:
                print(f"   Error: {error}")

    print(f"\n📊 Results:")
    print(f"   Expected failures: {expected_failures}/{len(tests)}")
    print(f"   Unexpected results: {unexpected_results}/{len(tests)}")

    if expected_failures == len(tests):
        print(f"\n🎉 PERFECT! All tests failed as expected!")
        print(f"✅ This proves our test suite will catch missing functionality")
        print(f"✅ When we implement the enhancements, these tests will guide us")
        print(f"✅ Test-driven development approach validated!")
        return 0
    else:
        print(f"\n⚠️  Some tests didn't behave as expected")
        print(f"❓ This might indicate some functionality already exists")
        return 1

if __name__ == "__main__":
    exit_code = main()
    print(f"\nTest suite exit code: {exit_code}")
    exit(exit_code)