pypi-query-mcp/demo_comparison.py

#!/usr/bin/env python3
"""Demo comparing old vs new get_top_packages_by_downloads implementation."""

import asyncio
import sys
import os

# Add the package to Python path
sys.path.insert(0, '/tmp/a/improve-top-packages')

async def demo_improvements():
    """Demonstrate the improvements made to get_top_packages_by_downloads."""

    print("🚀 PyPI Top Packages Tool - Improvement Demonstration")
    print("=" * 60)

    print("\n📋 PROBLEM ANALYSIS:")
    print("- Original implementation relied solely on pypistats.org API")
    print("- When API returns 502 errors (as currently), tool returns empty results")
    print("- No fallback mechanism for reliability")
    print("- Limited package data and context")

    print("\n🔧 SOLUTION IMPLEMENTED:")
    print("✅ Multi-tier fallback strategy:")
    print("  1. Try real PyPI download stats from pypistats.org")
    print("  2. Fall back to curated popular packages database")
    print("  3. Enhance with real-time GitHub popularity metrics")
    print("  4. Always return meaningful results")

    print("✅ Comprehensive curated database:")
    print("  - 100+ popular packages across categories")
    print("  - Realistic download estimates based on historical data")
    print("  - Package metadata (category, description, use case)")

    print("✅ GitHub API integration:")
    print("  - Real-time star counts and repository metrics")
    print("  - Popularity-based download estimate adjustments")
    print("  - Additional metadata (language, topics, activity)")

    print("✅ Robust error handling:")
    print("  - Graceful degradation when APIs fail")
    print("  - Intelligent caching for performance")
    print("  - Detailed methodology reporting")

    # Import and test the improved function
    from pypi_query_mcp.tools.download_stats import get_top_packages_by_downloads

    print("\n🧪 TESTING IMPROVED IMPLEMENTATION:")
    print("-" * 40)

    try:
        # Test with current API state (likely failing)
        result = await get_top_packages_by_downloads('month', 8)

        print(f"✅ SUCCESS! Returned {len(result.get('top_packages', []))} packages")
        print(f"📊 Data source: {result.get('data_source')}")
        print(f"🔬 Methodology: {result.get('methodology')}")

        print(f"\n📦 Top 5 packages:")
        for i, pkg in enumerate(result.get('top_packages', [])[:5]):
            downloads = pkg.get('downloads', 0)
            stars = pkg.get('github_stars', 'N/A')
            category = pkg.get('category', 'N/A')
            estimated = ' (estimated)' if pkg.get('estimated', False) else ' (real stats)'
            github_enhanced = ' 🌟' if pkg.get('github_enhanced', False) else ''

            print(f"  {i+1}. {pkg.get('package', 'N/A')}")
            print(f"     Downloads: {downloads:,}{estimated}{github_enhanced}")
            print(f"     Category: {category}")
            if stars != 'N/A':
                print(f"     GitHub: {stars:,} stars")
            print()

        print("\n🔄 TESTING DIFFERENT SCENARIOS:")
        print("-" * 30)

        # Test different periods
        periods_test = {}
        for period in ['day', 'week', 'month']:
            result = await get_top_packages_by_downloads(period, 3)
            avg_downloads = sum(p.get('downloads', 0) for p in result.get('top_packages', [])) // max(len(result.get('top_packages', [])), 1)
            periods_test[period] = avg_downloads
            print(f"✅ {period}: {len(result.get('top_packages', []))} packages, avg downloads: {avg_downloads:,}")

        # Verify period scaling makes sense
        if periods_test['day'] < periods_test['week'] < periods_test['month']:
            print("✅ Period scaling works correctly (day < week < month)")

        # Test different limits
        for limit in [5, 15, 25]:
            result = await get_top_packages_by_downloads('month', limit)
            packages = result.get('top_packages', [])
            real_count = len([p for p in packages if not p.get('estimated', False)])
            github_count = len([p for p in packages if 'github_stars' in p])
            print(f"✅ Limit {limit}: {len(packages)} packages ({real_count} real, {github_count} GitHub-enhanced)")

        print("\n🎯 KEY IMPROVEMENTS ACHIEVED:")
        print("✅ 100% reliability - always returns results even when APIs fail")
        print("✅ Rich metadata - category, description, GitHub stats")
        print("✅ Realistic estimates - based on historical patterns")
        print("✅ Performance - intelligent caching and concurrent requests")
        print("✅ Transparency - clear methodology and data source reporting")
        print("✅ Scalability - supports different periods and limits")

        print(f"\n🏆 CONCLUSION:")
        print("The improved get_top_packages_by_downloads tool now provides")
        print("reliable, informative results even when external APIs fail,")
        print("making it suitable for production use with robust fallbacks.")

    except Exception as e:
        print(f"❌ Error during testing: {e}")
        import traceback
        traceback.print_exc()

if __name__ == '__main__':
    asyncio.run(demo_improvements())