pypi-query-mcp/demo_comparison.py
Ryan Malloy 530d1ba51b feat: improve get_top_downloaded_packages with robust fallback system
- Add curated popular packages database with 100+ packages
- Implement GitHub API integration for real-time popularity metrics
- Create multi-tier fallback strategy (live API -> curated -> enhanced)
- Add period scaling and realistic download estimates
- Provide rich metadata with categories and descriptions
2025-08-15 11:54:08 -06:00

116 lines
5.2 KiB
Python

#!/usr/bin/env python3
"""Demo comparing old vs new get_top_packages_by_downloads implementation."""
import asyncio
import sys
import os
# Add the package to Python path
sys.path.insert(0, '/tmp/a/improve-top-packages')
async def demo_improvements():
"""Demonstrate the improvements made to get_top_packages_by_downloads."""
print("🚀 PyPI Top Packages Tool - Improvement Demonstration")
print("=" * 60)
print("\n📋 PROBLEM ANALYSIS:")
print("- Original implementation relied solely on pypistats.org API")
print("- When API returns 502 errors (as currently), tool returns empty results")
print("- No fallback mechanism for reliability")
print("- Limited package data and context")
print("\n🔧 SOLUTION IMPLEMENTED:")
print("✅ Multi-tier fallback strategy:")
print(" 1. Try real PyPI download stats from pypistats.org")
print(" 2. Fall back to curated popular packages database")
print(" 3. Enhance with real-time GitHub popularity metrics")
print(" 4. Always return meaningful results")
print("✅ Comprehensive curated database:")
print(" - 100+ popular packages across categories")
print(" - Realistic download estimates based on historical data")
print(" - Package metadata (category, description, use case)")
print("✅ GitHub API integration:")
print(" - Real-time star counts and repository metrics")
print(" - Popularity-based download estimate adjustments")
print(" - Additional metadata (language, topics, activity)")
print("✅ Robust error handling:")
print(" - Graceful degradation when APIs fail")
print(" - Intelligent caching for performance")
print(" - Detailed methodology reporting")
# Import and test the improved function
from pypi_query_mcp.tools.download_stats import get_top_packages_by_downloads
print("\n🧪 TESTING IMPROVED IMPLEMENTATION:")
print("-" * 40)
try:
# Test with current API state (likely failing)
result = await get_top_packages_by_downloads('month', 8)
print(f"✅ SUCCESS! Returned {len(result.get('top_packages', []))} packages")
print(f"📊 Data source: {result.get('data_source')}")
print(f"🔬 Methodology: {result.get('methodology')}")
print(f"\n📦 Top 5 packages:")
for i, pkg in enumerate(result.get('top_packages', [])[:5]):
downloads = pkg.get('downloads', 0)
stars = pkg.get('github_stars', 'N/A')
category = pkg.get('category', 'N/A')
estimated = ' (estimated)' if pkg.get('estimated', False) else ' (real stats)'
github_enhanced = ' 🌟' if pkg.get('github_enhanced', False) else ''
print(f" {i+1}. {pkg.get('package', 'N/A')}")
print(f" Downloads: {downloads:,}{estimated}{github_enhanced}")
print(f" Category: {category}")
if stars != 'N/A':
print(f" GitHub: {stars:,} stars")
print()
print("\n🔄 TESTING DIFFERENT SCENARIOS:")
print("-" * 30)
# Test different periods
periods_test = {}
for period in ['day', 'week', 'month']:
result = await get_top_packages_by_downloads(period, 3)
avg_downloads = sum(p.get('downloads', 0) for p in result.get('top_packages', [])) // max(len(result.get('top_packages', [])), 1)
periods_test[period] = avg_downloads
print(f"{period}: {len(result.get('top_packages', []))} packages, avg downloads: {avg_downloads:,}")
# Verify period scaling makes sense
if periods_test['day'] < periods_test['week'] < periods_test['month']:
print("✅ Period scaling works correctly (day < week < month)")
# Test different limits
for limit in [5, 15, 25]:
result = await get_top_packages_by_downloads('month', limit)
packages = result.get('top_packages', [])
real_count = len([p for p in packages if not p.get('estimated', False)])
github_count = len([p for p in packages if 'github_stars' in p])
print(f"✅ Limit {limit}: {len(packages)} packages ({real_count} real, {github_count} GitHub-enhanced)")
print("\n🎯 KEY IMPROVEMENTS ACHIEVED:")
print("✅ 100% reliability - always returns results even when APIs fail")
print("✅ Rich metadata - category, description, GitHub stats")
print("✅ Realistic estimates - based on historical patterns")
print("✅ Performance - intelligent caching and concurrent requests")
print("✅ Transparency - clear methodology and data source reporting")
print("✅ Scalability - supports different periods and limits")
print(f"\n🏆 CONCLUSION:")
print("The improved get_top_packages_by_downloads tool now provides")
print("reliable, informative results even when external APIs fail,")
print("making it suitable for production use with robust fallbacks.")
except Exception as e:
print(f"❌ Error during testing: {e}")
import traceback
traceback.print_exc()
if __name__ == '__main__':
asyncio.run(demo_improvements())