crawailer/examples/basic_usage.py
Crawailer Developer 7634f9fc32 Initial commit: JavaScript API enhancement preparation
- Comprehensive test suite (700+ lines) for JS execution in high-level API
- Test coverage analysis and validation infrastructure
- Enhancement proposal and implementation strategy
- Mock HTTP server with realistic JavaScript scenarios
- Parallel implementation strategy using expert agents and git worktrees

Ready for test-driven implementation of JavaScript enhancements.
2025-09-14 21:22:30 -06:00

122 lines
3.5 KiB
Python

"""
Basic usage examples for Crawailer.
This demonstrates the main API functions and typical workflows.
"""
import asyncio
import crawailer as web
async def basic_example():
"""Basic content extraction from a single URL."""
print("🕷️ Basic Crawailer Example")
print("=" * 50)
# Simple content extraction
print("\n1. Single page extraction:")
content = await web.get("https://example.com")
print(f" Title: {content.title}")
print(f" Word count: {content.word_count}")
print(f" Reading time: {content.reading_time}")
print(f" Quality score: {content.quality_score:.1f}/10")
print(f" Content type: {content.content_type}")
# Show first 200 characters of markdown
print(f"\n Markdown preview:")
print(f" {content.markdown[:200]}...")
async def batch_example():
"""Batch processing multiple URLs."""
print("\n2. Batch processing:")
urls = [
"https://example.com",
"https://httpbin.org/html",
"https://httpbin.org/json" # This will be different content
]
results = await web.get_many(urls, max_concurrent=3)
print(f" Processed {len(results)} URLs")
for i, result in enumerate(results):
if result:
print(f" {i+1}. {result.title} ({result.word_count} words)")
else:
print(f" {i+1}. Failed to fetch")
async def discovery_example():
"""Content discovery (placeholder implementation)."""
print("\n3. Content discovery:")
try:
# Note: This is a placeholder implementation
results = await web.discover("web crawling", max_pages=3)
print(f" Found {len(results)} relevant sources")
for result in results:
print(f" - {result.title}")
except NotImplementedError:
print(" Discovery feature coming soon!")
async def context_manager_example():
"""Using browser as context manager for more control."""
print("\n4. Advanced browser control:")
from crawailer import Browser, BrowserConfig
config = BrowserConfig(headless=True, timeout=15000)
async with Browser(config) as browser:
# Fetch with custom wait condition
page_data = await browser.fetch_page(
"https://httpbin.org/delay/1",
timeout=10
)
print(f" Fetched: {page_data['url']}")
print(f" Status: {page_data['status']}")
print(f" Load time: {page_data['load_time']:.2f}s")
async def content_analysis_example():
"""Analyzing extracted content."""
print("\n5. Content analysis:")
content = await web.get("https://httpbin.org/html")
print(f" Content hash: {content.content_hash[:16]}...")
print(f" Language: {content.language}")
print(f" Links found: {len(content.links)}")
print(f" Images found: {len(content.images)}")
if content.links:
print(f" First link: {content.links[0]['text']} -> {content.links[0]['url']}")
async def main():
"""Run all examples."""
try:
await basic_example()
await batch_example()
await discovery_example()
await context_manager_example()
await content_analysis_example()
print("\n✅ All examples completed successfully!")
except Exception as e:
print(f"\n❌ Error: {e}")
finally:
# Clean up global resources
await web.cleanup()
if __name__ == "__main__":
asyncio.run(main())