crawailer/examples/basic_usage.py

"""
Basic usage examples for Crawailer.

This demonstrates the main API functions and typical workflows.
"""

import asyncio
import crawailer as web


async def basic_example():
    """Basic content extraction from a single URL."""
    print("🕷️ Basic Crawailer Example")
    print("=" * 50)

    # Simple content extraction
    print("\n1. Single page extraction:")
    content = await web.get("https://example.com")

    print(f"   Title: {content.title}")
    print(f"   Word count: {content.word_count}")
    print(f"   Reading time: {content.reading_time}")
    print(f"   Quality score: {content.quality_score:.1f}/10")
    print(f"   Content type: {content.content_type}")

    # Show first 200 characters of markdown
    print(f"\n   Markdown preview:")
    print(f"   {content.markdown[:200]}...")


async def batch_example():
    """Batch processing multiple URLs."""
    print("\n2. Batch processing:")

    urls = [
        "https://example.com",
        "https://httpbin.org/html",
        "https://httpbin.org/json"  # This will be different content
    ]

    results = await web.get_many(urls, max_concurrent=3)

    print(f"   Processed {len(results)} URLs")
    for i, result in enumerate(results):
        if result:
            print(f"   {i+1}. {result.title} ({result.word_count} words)")
        else:
            print(f"   {i+1}. Failed to fetch")


async def discovery_example():
    """Content discovery (placeholder implementation)."""
    print("\n3. Content discovery:")

    try:
        # Note: This is a placeholder implementation
        results = await web.discover("web crawling", max_pages=3)
        print(f"   Found {len(results)} relevant sources")

        for result in results:
            print(f"   - {result.title}")

    except NotImplementedError:
        print("   Discovery feature coming soon!")


async def context_manager_example():
    """Using browser as context manager for more control."""
    print("\n4. Advanced browser control:")

    from crawailer import Browser, BrowserConfig

    config = BrowserConfig(headless=True, timeout=15000)

    async with Browser(config) as browser:
        # Fetch with custom wait condition
        page_data = await browser.fetch_page(
            "https://httpbin.org/delay/1",
            timeout=10
        )

        print(f"   Fetched: {page_data['url']}")
        print(f"   Status: {page_data['status']}")
        print(f"   Load time: {page_data['load_time']:.2f}s")


async def content_analysis_example():
    """Analyzing extracted content."""
    print("\n5. Content analysis:")

    content = await web.get("https://httpbin.org/html")

    print(f"   Content hash: {content.content_hash[:16]}...")
    print(f"   Language: {content.language}")
    print(f"   Links found: {len(content.links)}")
    print(f"   Images found: {len(content.images)}")

    if content.links:
        print(f"   First link: {content.links[0]['text']} -> {content.links[0]['url']}")


async def main():
    """Run all examples."""
    try:
        await basic_example()
        await batch_example()
        await discovery_example()
        await context_manager_example()
        await content_analysis_example()

        print("\n✅ All examples completed successfully!")

    except Exception as e:
        print(f"\n❌ Error: {e}")

    finally:
        # Clean up global resources
        await web.cleanup()


if __name__ == "__main__":
    asyncio.run(main())