chore: upgrade all Python packages and fix linting issues

- Update all dependencies to latest versions (fastmcp, httpx, packaging, etc.)
- Downgrade click from yanked 8.2.2 to stable 8.1.7
- Fix code formatting and linting issues with ruff
- Most tests passing (2 test failures in dependency resolver need investigation)
This commit is contained in:
Ryan Malloy 2025-08-15 20:23:14 -06:00
parent 503ea589f1
commit 8b43927493
34 changed files with 2276 additions and 1593 deletions

View File

@ -3,97 +3,105 @@
import asyncio
import sys
import os
# Add the package to Python path
sys.path.insert(0, '/tmp/a/improve-top-packages')
sys.path.insert(0, "/tmp/a/improve-top-packages")
async def demo_improvements():
"""Demonstrate the improvements made to get_top_packages_by_downloads."""
print("🚀 PyPI Top Packages Tool - Improvement Demonstration")
print("=" * 60)
print("\n📋 PROBLEM ANALYSIS:")
print("- Original implementation relied solely on pypistats.org API")
print("- When API returns 502 errors (as currently), tool returns empty results")
print("- No fallback mechanism for reliability")
print("- Limited package data and context")
print("\n🔧 SOLUTION IMPLEMENTED:")
print("✅ Multi-tier fallback strategy:")
print(" 1. Try real PyPI download stats from pypistats.org")
print(" 2. Fall back to curated popular packages database")
print(" 3. Enhance with real-time GitHub popularity metrics")
print(" 4. Always return meaningful results")
print("✅ Comprehensive curated database:")
print(" - 100+ popular packages across categories")
print(" - Realistic download estimates based on historical data")
print(" - Package metadata (category, description, use case)")
print("✅ GitHub API integration:")
print(" - Real-time star counts and repository metrics")
print(" - Popularity-based download estimate adjustments")
print(" - Additional metadata (language, topics, activity)")
print("✅ Robust error handling:")
print(" - Graceful degradation when APIs fail")
print(" - Intelligent caching for performance")
print(" - Detailed methodology reporting")
# Import and test the improved function
from pypi_query_mcp.tools.download_stats import get_top_packages_by_downloads
print("\n🧪 TESTING IMPROVED IMPLEMENTATION:")
print("-" * 40)
try:
# Test with current API state (likely failing)
result = await get_top_packages_by_downloads('month', 8)
result = await get_top_packages_by_downloads("month", 8)
print(f"✅ SUCCESS! Returned {len(result.get('top_packages', []))} packages")
print(f"📊 Data source: {result.get('data_source')}")
print(f"🔬 Methodology: {result.get('methodology')}")
print(f"\n📦 Top 5 packages:")
for i, pkg in enumerate(result.get('top_packages', [])[:5]):
downloads = pkg.get('downloads', 0)
stars = pkg.get('github_stars', 'N/A')
category = pkg.get('category', 'N/A')
estimated = ' (estimated)' if pkg.get('estimated', False) else ' (real stats)'
github_enhanced = ' 🌟' if pkg.get('github_enhanced', False) else ''
print(f" {i+1}. {pkg.get('package', 'N/A')}")
print("\n📦 Top 5 packages:")
for i, pkg in enumerate(result.get("top_packages", [])[:5]):
downloads = pkg.get("downloads", 0)
stars = pkg.get("github_stars", "N/A")
category = pkg.get("category", "N/A")
estimated = (
" (estimated)" if pkg.get("estimated", False) else " (real stats)"
)
github_enhanced = " 🌟" if pkg.get("github_enhanced", False) else ""
print(f" {i + 1}. {pkg.get('package', 'N/A')}")
print(f" Downloads: {downloads:,}{estimated}{github_enhanced}")
print(f" Category: {category}")
if stars != 'N/A':
if stars != "N/A":
print(f" GitHub: {stars:,} stars")
print()
print("\n🔄 TESTING DIFFERENT SCENARIOS:")
print("-" * 30)
# Test different periods
periods_test = {}
for period in ['day', 'week', 'month']:
for period in ["day", "week", "month"]:
result = await get_top_packages_by_downloads(period, 3)
avg_downloads = sum(p.get('downloads', 0) for p in result.get('top_packages', [])) // max(len(result.get('top_packages', [])), 1)
avg_downloads = sum(
p.get("downloads", 0) for p in result.get("top_packages", [])
) // max(len(result.get("top_packages", [])), 1)
periods_test[period] = avg_downloads
print(f"{period}: {len(result.get('top_packages', []))} packages, avg downloads: {avg_downloads:,}")
print(
f"{period}: {len(result.get('top_packages', []))} packages, avg downloads: {avg_downloads:,}"
)
# Verify period scaling makes sense
if periods_test['day'] < periods_test['week'] < periods_test['month']:
if periods_test["day"] < periods_test["week"] < periods_test["month"]:
print("✅ Period scaling works correctly (day < week < month)")
# Test different limits
for limit in [5, 15, 25]:
result = await get_top_packages_by_downloads('month', limit)
packages = result.get('top_packages', [])
real_count = len([p for p in packages if not p.get('estimated', False)])
github_count = len([p for p in packages if 'github_stars' in p])
print(f"✅ Limit {limit}: {len(packages)} packages ({real_count} real, {github_count} GitHub-enhanced)")
result = await get_top_packages_by_downloads("month", limit)
packages = result.get("top_packages", [])
real_count = len([p for p in packages if not p.get("estimated", False)])
github_count = len([p for p in packages if "github_stars" in p])
print(
f"✅ Limit {limit}: {len(packages)} packages ({real_count} real, {github_count} GitHub-enhanced)"
)
print("\n🎯 KEY IMPROVEMENTS ACHIEVED:")
print("✅ 100% reliability - always returns results even when APIs fail")
print("✅ Rich metadata - category, description, GitHub stats")
@ -101,16 +109,18 @@ async def demo_improvements():
print("✅ Performance - intelligent caching and concurrent requests")
print("✅ Transparency - clear methodology and data source reporting")
print("✅ Scalability - supports different periods and limits")
print(f"\n🏆 CONCLUSION:")
print("\n🏆 CONCLUSION:")
print("The improved get_top_packages_by_downloads tool now provides")
print("reliable, informative results even when external APIs fail,")
print("making it suitable for production use with robust fallbacks.")
except Exception as e:
print(f"❌ Error during testing: {e}")
import traceback
traceback.print_exc()
if __name__ == '__main__':
asyncio.run(demo_improvements())
if __name__ == "__main__":
asyncio.run(demo_improvements())

View File

@ -10,19 +10,14 @@ This demonstrates how to use the new transitive dependency functionality.
# Basic usage (backward compatible)
example_1 = {
"tool": "get_package_dependencies",
"parameters": {
"package_name": "requests"
}
"parameters": {"package_name": "requests"},
}
# Returns: Direct dependencies only (existing behavior)
# Enable transitive dependencies
example_2 = {
"tool": "get_package_dependencies",
"parameters": {
"package_name": "requests",
"include_transitive": True
}
"tool": "get_package_dependencies",
"parameters": {"package_name": "requests", "include_transitive": True},
}
# Returns: Complete dependency tree with analysis
@ -33,8 +28,8 @@ example_3 = {
"package_name": "django",
"include_transitive": True,
"max_depth": 3,
"python_version": "3.11"
}
"python_version": "3.11",
},
}
# Returns: Filtered dependency tree for Python 3.11, max 3 levels deep
@ -46,20 +41,18 @@ example_response = {
"include_transitive": True,
"max_depth": 5,
"python_version": "3.10",
# Direct dependencies (same as before)
"runtime_dependencies": [
"urllib3>=1.21.1,<3",
"certifi>=2017.4.17",
"charset-normalizer>=2,<4",
"idna>=2.5,<4"
"idna>=2.5,<4",
],
"development_dependencies": [],
"optional_dependencies": {
"security": ["pyOpenSSL>=0.14", "cryptography>=1.3.4"],
"socks": ["PySocks>=1.5.6,!=1.5.7"]
"socks": ["PySocks>=1.5.6,!=1.5.7"],
},
# NEW: Transitive dependency information
"transitive_dependencies": {
"dependency_tree": {
@ -71,41 +64,41 @@ example_response = {
"package_name": "urllib3",
"version": "2.0.4",
"depth": 1,
"children": {}
"children": {},
},
"certifi": {
"package_name": "certifi",
"package_name": "certifi",
"version": "2023.7.22",
"depth": 1,
"children": {}
"children": {},
},
"charset-normalizer": {
"package_name": "charset-normalizer",
"version": "3.2.0",
"version": "3.2.0",
"depth": 1,
"children": {}
"children": {},
},
"idna": {
"package_name": "idna",
"version": "3.4",
"depth": 1,
"children": {}
}
}
"children": {},
},
},
},
"all_packages": {
"requests": {
"name": "requests",
"version": "2.31.0",
"depth": 0,
"dependency_count": {"runtime": 4, "development": 0, "total_extras": 0}
"dependency_count": {"runtime": 4, "development": 0, "total_extras": 0},
},
"urllib3": {
"name": "urllib3",
"version": "2.0.4",
"version": "2.0.4",
"depth": 1,
"dependency_count": {"runtime": 0, "development": 0, "total_extras": 0}
}
"dependency_count": {"runtime": 0, "development": 0, "total_extras": 0},
},
# ... other packages
},
"circular_dependencies": [],
@ -115,11 +108,10 @@ example_response = {
"average_depth": 0.8,
"shallow_deps": 4,
"deep_deps": 0,
"leaf_packages": ["urllib3", "certifi", "charset-normalizer", "idna"]
}
"leaf_packages": ["urllib3", "certifi", "charset-normalizer", "idna"],
},
},
# Enhanced summary statistics
# Enhanced summary statistics
"dependency_summary": {
"direct_runtime_count": 4,
"direct_dev_count": 0,
@ -133,27 +125,22 @@ example_response = {
"score": 8.2,
"level": "low",
"recommendation": "Simple dependency structure, low maintenance overhead",
"factors": {
"total_packages": 5,
"max_depth": 1,
"total_dependencies": 4
}
}
"factors": {"total_packages": 5, "max_depth": 1, "total_dependencies": 4},
},
},
# Performance and health analysis
"analysis": {
"resolution_stats": {
"total_packages": 5,
"total_runtime_dependencies": 4,
"max_depth": 1
"max_depth": 1,
},
"potential_conflicts": [],
"maintenance_concerns": {
"total_packages": 5,
"packages_without_version_info": 0,
"high_dependency_packages": [],
"maintenance_risk_score": {"score": 0.0, "level": "low"}
"maintenance_risk_score": {"score": 0.0, "level": "low"},
},
"performance_impact": {
"estimated_install_time_seconds": 15,
@ -162,11 +149,11 @@ example_response = {
"recommendations": [],
"metrics": {
"package_count_impact": "low",
"depth_impact": "low",
"resolution_complexity": "simple"
}
}
}
"depth_impact": "low",
"resolution_complexity": "simple",
},
},
},
}
# Usage examples for different complexity levels
@ -174,44 +161,44 @@ complexity_examples = {
"simple_package": {
"package": "six",
"expected_packages": 1, # No dependencies
"complexity": "low"
"complexity": "low",
},
"moderate_package": {
"package": "requests",
"expected_packages": 5, # Few dependencies
"complexity": "low"
"complexity": "low",
},
"complex_package": {
"package": "django",
"expected_packages": 15, # Moderate dependencies
"complexity": "moderate"
"complexity": "moderate",
},
"very_complex_package": {
"package": "tensorflow",
"expected_packages": 50, # Many dependencies
"complexity": "high"
}
"complexity": "high",
},
}
# Test cases for edge cases
edge_case_examples = {
"circular_dependencies": {
"description": "Package with circular dependency references",
"expected_behavior": "Detected and reported in circular_dependencies array"
"expected_behavior": "Detected and reported in circular_dependencies array",
},
"deep_nesting": {
"description": "Package with very deep dependency chains",
"description": "Package with very deep dependency chains",
"max_depth": 2,
"expected_behavior": "Truncated at max_depth with depth tracking"
"expected_behavior": "Truncated at max_depth with depth tracking",
},
"version_conflicts": {
"description": "Dependencies with conflicting version requirements",
"expected_behavior": "Reported in potential_conflicts array"
"expected_behavior": "Reported in potential_conflicts array",
},
"missing_packages": {
"description": "Dependencies that don't exist on PyPI",
"expected_behavior": "Graceful handling with warnings in logs"
}
"expected_behavior": "Graceful handling with warnings in logs",
},
}
print("Enhanced get_package_dependencies Tool")
@ -231,4 +218,4 @@ print("✓ Detailed dependency tree structure")
print("✓ Version conflict detection")
print("✓ Python version filtering")
print()
print("See TRANSITIVE_DEPS_DOCUMENTATION.md for full details.")
print("See TRANSITIVE_DEPS_DOCUMENTATION.md for full details.")

View File

@ -7,39 +7,38 @@ to resolve optional dependencies for Python packages.
"""
import asyncio
import json
from pathlib import Path
from pypi_query_mcp.tools.dependency_resolver import resolve_package_dependencies
from pypi_query_mcp.core.pypi_client import PyPIClient
from pypi_query_mcp.tools.dependency_resolver import resolve_package_dependencies
async def show_available_extras(package_name: str):
"""Show what extras are available for a package."""
print(f"\n📦 Available extras for {package_name}:")
async with PyPIClient() as client:
package_data = await client.get_package_info(package_name)
info = package_data.get("info", {})
provides_extra = info.get("provides_extra", [])
requires_dist = info.get("requires_dist", []) or []
if provides_extra:
print(f" Provides extras: {', '.join(provides_extra)}")
else:
print(" No provides_extra field found")
# Find extras from requires_dist
extras_in_deps = set()
for req in requires_dist:
if "extra ==" in req:
# Extract extra name from requirement like: pytest>=6.0.0; extra=='test'
import re
match = re.search(r'extra\s*==\s*["\']([^"\']+)["\']', req)
if match:
extras_in_deps.add(match.group(1))
if extras_in_deps:
print(f" Extras with dependencies: {', '.join(sorted(extras_in_deps))}")
else:
@ -48,44 +47,44 @@ async def show_available_extras(package_name: str):
async def demo_extras_resolution():
"""Demonstrate extras resolution with various packages."""
# Examples of packages with well-known extras
examples = [
{
"package": "requests",
"extras": ["socks"],
"description": "HTTP library with SOCKS proxy support"
"description": "HTTP library with SOCKS proxy support",
},
{
"package": "django",
"package": "django",
"extras": ["argon2", "bcrypt"],
"description": "Web framework with password hashing extras"
"description": "Web framework with password hashing extras",
},
{
"package": "setuptools",
"extras": ["test"],
"description": "Package development tools with testing extras"
"description": "Package development tools with testing extras",
},
{
"package": "flask",
"extras": ["async", "dotenv"],
"description": "Web framework with async and dotenv support"
}
"description": "Web framework with async and dotenv support",
},
]
for example in examples:
package_name = example["package"]
extras = example["extras"]
description = example["description"]
print(f"\n{'='*60}")
print(f"\n{'=' * 60}")
print(f"🔍 Example: {package_name}")
print(f"📋 Description: {description}")
print(f"🎯 Testing extras: {extras}")
# Show available extras
await show_available_extras(package_name)
try:
# Resolve without extras
print(f"\n📊 Resolving {package_name} WITHOUT extras...")
@ -93,29 +92,33 @@ async def demo_extras_resolution():
package_name=package_name,
python_version="3.10",
include_extras=[],
max_depth=1 # Limit depth for demo
max_depth=1, # Limit depth for demo
)
# Resolve with extras
print(f"📊 Resolving {package_name} WITH extras {extras}...")
result_with_extras = await resolve_package_dependencies(
package_name=package_name,
python_version="3.10",
include_extras=extras,
max_depth=1
max_depth=1,
)
# Compare results
print(f"\n📈 Results comparison:")
print(f" Without extras: {result_no_extras['summary']['total_extra_dependencies']} extra deps")
print(f" With extras: {result_with_extras['summary']['total_extra_dependencies']} extra deps")
print("\n📈 Results comparison:")
print(
f" Without extras: {result_no_extras['summary']['total_extra_dependencies']} extra deps"
)
print(
f" With extras: {result_with_extras['summary']['total_extra_dependencies']} extra deps"
)
# Show actual extras resolved
main_pkg = next(iter(result_with_extras['dependency_tree'].values()), {})
extras_resolved = main_pkg.get('dependencies', {}).get('extras', {})
main_pkg = next(iter(result_with_extras["dependency_tree"].values()), {})
extras_resolved = main_pkg.get("dependencies", {}).get("extras", {})
if extras_resolved:
print(f" ✅ Extras resolved successfully:")
print(" ✅ Extras resolved successfully:")
for extra_name, deps in extras_resolved.items():
print(f" - {extra_name}: {len(deps)} dependencies")
for dep in deps[:2]: # Show first 2
@ -123,53 +126,55 @@ async def demo_extras_resolution():
if len(deps) > 2:
print(f" * ... and {len(deps) - 2} more")
else:
print(f" ⚠️ No extras resolved (may not exist or have no dependencies)")
print(
" ⚠️ No extras resolved (may not exist or have no dependencies)"
)
except Exception as e:
print(f" ❌ Error: {e}")
async def demo_incorrect_usage():
"""Demonstrate common mistakes with extras usage."""
print(f"\n{'='*60}")
print(f"\n{'=' * 60}")
print("❌ Common Mistakes with Extras")
print("='*60")
mistakes = [
{
"package": "requests",
"package": "requests",
"extras": ["dev", "test"], # These don't exist for requests
"error": "Using generic extra names instead of package-specific ones"
"error": "Using generic extra names instead of package-specific ones",
},
{
"package": "setuptools",
"extras": ["testing"], # Should be "test" not "testing"
"error": "Using similar but incorrect extra names"
}
"error": "Using similar but incorrect extra names",
},
]
for mistake in mistakes:
package_name = mistake["package"]
extras = mistake["extras"]
error_desc = mistake["error"]
print(f"\n🚫 Mistake: {error_desc}")
print(f" Package: {package_name}")
print(f" Incorrect extras: {extras}")
try:
result = await resolve_package_dependencies(
package_name=package_name,
python_version="3.10",
python_version="3.10",
include_extras=extras,
max_depth=1
max_depth=1,
)
total_extras = result['summary']['total_extra_dependencies']
total_extras = result["summary"]["total_extra_dependencies"]
print(f" Result: {total_extras} extra dependencies resolved")
if total_extras == 0:
print(f" ⚠️ No extras resolved - these extras likely don't exist")
print(" ⚠️ No extras resolved - these extras likely don't exist")
except Exception as e:
print(f" ❌ Error: {e}")
@ -181,18 +186,20 @@ async def main():
print()
print("This demo shows how to properly use the include_extras parameter")
print("to resolve optional dependencies for Python packages.")
await demo_extras_resolution()
await demo_incorrect_usage()
print(f"\n{'='*60}")
print(f"\n{'=' * 60}")
print("✨ Demo completed!")
print()
print("💡 Key takeaways:")
print(" 1. Always check what extras are available for a package first")
print(" 2. Use the exact extra names defined by the package")
print(" 3. Check package documentation or PyPI page for available extras")
print(" 4. Not all packages have extras, and some extras may have no dependencies")
print(
" 4. Not all packages have extras, and some extras may have no dependencies"
)
print()
print("📚 To find available extras:")
print(" - Check the package's PyPI page")
@ -202,4 +209,4 @@ async def main():
if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

View File

@ -2,8 +2,9 @@
"""Direct test of fallback mechanisms."""
import asyncio
import sys
import os
import sys
sys.path.insert(0, os.path.abspath("."))
from pypi_query_mcp.core.stats_client import PyPIStatsClient
@ -12,29 +13,31 @@ from pypi_query_mcp.core.stats_client import PyPIStatsClient
async def test_fallback():
"""Test fallback data generation directly."""
print("Testing fallback data generation...")
async with PyPIStatsClient() as client:
# Force API failure tracking to trigger fallback
client._api_health["consecutive_failures"] = 5 # Force fallback mode
# Test recent downloads fallback
fallback_recent = client._generate_fallback_recent_downloads("requests", "month")
print(f"✅ Fallback recent downloads generated for requests:")
fallback_recent = client._generate_fallback_recent_downloads(
"requests", "month"
)
print("✅ Fallback recent downloads generated for requests:")
print(f" Source: {fallback_recent.get('source')}")
print(f" Downloads: {fallback_recent['data']['last_month']:,}")
print(f" Note: {fallback_recent.get('note')}")
# Test overall downloads fallback
# Test overall downloads fallback
fallback_overall = client._generate_fallback_overall_downloads("numpy", False)
print(f"\n✅ Fallback time series generated for numpy:")
print("\n✅ Fallback time series generated for numpy:")
print(f" Source: {fallback_overall.get('source')}")
print(f" Data points: {len(fallback_overall['data'])}")
print(f" Note: {fallback_overall.get('note')}")
# Test the should_use_fallback logic
should_fallback = client._should_use_fallback()
print(f"\n✅ Fallback logic working: {should_fallback}")
if __name__ == "__main__":
asyncio.run(test_fallback())
asyncio.run(test_fallback())

1010
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -117,12 +117,29 @@ class DependencyParser:
Dictionary with categorized dependencies
"""
categories = {"runtime": [], "development": [], "optional": {}, "extras": {}}
# Define development-related extra names
dev_extra_names = {
'dev', 'development', 'test', 'testing', 'tests', 'lint', 'linting',
'doc', 'docs', 'documentation', 'build', 'check', 'cover', 'coverage',
'type', 'typing', 'mypy', 'style', 'format', 'quality'
"dev",
"development",
"test",
"testing",
"tests",
"lint",
"linting",
"doc",
"docs",
"documentation",
"build",
"check",
"cover",
"coverage",
"type",
"typing",
"mypy",
"style",
"format",
"quality",
}
for req in requirements:
@ -141,7 +158,7 @@ class DependencyParser:
if extra_name not in categories["extras"]:
categories["extras"][extra_name] = []
categories["extras"][extra_name].append(req)
# Check if this extra is development-related
if extra_name.lower() in dev_extra_names:
categories["development"].append(req)

View File

@ -2,7 +2,7 @@
import asyncio
import logging
from typing import Any, Dict, Optional
from typing import Any
import httpx
@ -17,7 +17,7 @@ class GitHubAPIClient:
timeout: float = 10.0,
max_retries: int = 2,
retry_delay: float = 1.0,
github_token: Optional[str] = None,
github_token: str | None = None,
):
"""Initialize GitHub API client.
@ -33,7 +33,7 @@ class GitHubAPIClient:
self.retry_delay = retry_delay
# Simple in-memory cache for repository data
self._cache: Dict[str, Dict[str, Any]] = {}
self._cache: dict[str, dict[str, Any]] = {}
self._cache_ttl = 3600 # 1 hour cache
# HTTP client configuration
@ -41,7 +41,7 @@ class GitHubAPIClient:
"Accept": "application/vnd.github.v3+json",
"User-Agent": "pypi-query-mcp-server/0.1.0",
}
if github_token:
headers["Authorization"] = f"token {github_token}"
@ -67,12 +67,13 @@ class GitHubAPIClient:
"""Generate cache key for repository data."""
return f"repo:{repo}"
def _is_cache_valid(self, cache_entry: Dict[str, Any]) -> bool:
def _is_cache_valid(self, cache_entry: dict[str, Any]) -> bool:
"""Check if cache entry is still valid."""
import time
return time.time() - cache_entry.get("timestamp", 0) < self._cache_ttl
async def _make_request(self, url: str) -> Optional[Dict[str, Any]]:
async def _make_request(self, url: str) -> dict[str, Any] | None:
"""Make HTTP request with retry logic and error handling.
Args:
@ -85,7 +86,9 @@ class GitHubAPIClient:
for attempt in range(self.max_retries + 1):
try:
logger.debug(f"Making GitHub API request to {url} (attempt {attempt + 1})")
logger.debug(
f"Making GitHub API request to {url} (attempt {attempt + 1})"
)
response = await self._client.get(url)
@ -100,12 +103,16 @@ class GitHubAPIClient:
logger.warning(f"GitHub API rate limit or permission denied: {url}")
return None
elif response.status_code >= 500:
logger.warning(f"GitHub API server error {response.status_code}: {url}")
logger.warning(
f"GitHub API server error {response.status_code}: {url}"
)
if attempt < self.max_retries:
continue
return None
else:
logger.warning(f"Unexpected GitHub API status {response.status_code}: {url}")
logger.warning(
f"Unexpected GitHub API status {response.status_code}: {url}"
)
return None
except httpx.TimeoutException:
@ -120,13 +127,17 @@ class GitHubAPIClient:
# Wait before retry (except on last attempt)
if attempt < self.max_retries:
await asyncio.sleep(self.retry_delay * (2 ** attempt))
await asyncio.sleep(self.retry_delay * (2**attempt))
# If we get here, all retries failed
logger.error(f"Failed to fetch GitHub data after {self.max_retries + 1} attempts: {last_exception}")
logger.error(
f"Failed to fetch GitHub data after {self.max_retries + 1} attempts: {last_exception}"
)
return None
async def get_repository_stats(self, repo_path: str, use_cache: bool = True) -> Optional[Dict[str, Any]]:
async def get_repository_stats(
self, repo_path: str, use_cache: bool = True
) -> dict[str, Any] | None:
"""Get repository statistics from GitHub API.
Args:
@ -147,10 +158,10 @@ class GitHubAPIClient:
# Make API request
url = f"{self.base_url}/repos/{repo_path}"
try:
data = await self._make_request(url)
if data:
# Extract relevant statistics
stats = {
@ -171,14 +182,19 @@ class GitHubAPIClient:
"has_wiki": data.get("has_wiki", False),
"archived": data.get("archived", False),
"disabled": data.get("disabled", False),
"license": data.get("license", {}).get("name") if data.get("license") else None,
"license": data.get("license", {}).get("name")
if data.get("license")
else None,
}
# Cache the result
import time
self._cache[cache_key] = {"data": stats, "timestamp": time.time()}
logger.debug(f"Fetched GitHub stats for {repo_path}: {stats['stars']} stars")
logger.debug(
f"Fetched GitHub stats for {repo_path}: {stats['stars']} stars"
)
return stats
else:
return None
@ -188,11 +204,8 @@ class GitHubAPIClient:
return None
async def get_multiple_repo_stats(
self,
repo_paths: list[str],
use_cache: bool = True,
max_concurrent: int = 5
) -> Dict[str, Optional[Dict[str, Any]]]:
self, repo_paths: list[str], use_cache: bool = True, max_concurrent: int = 5
) -> dict[str, dict[str, Any] | None]:
"""Get statistics for multiple repositories concurrently.
Args:
@ -205,7 +218,7 @@ class GitHubAPIClient:
"""
semaphore = asyncio.Semaphore(max_concurrent)
async def fetch_repo_stats(repo_path: str) -> tuple[str, Optional[Dict[str, Any]]]:
async def fetch_repo_stats(repo_path: str) -> tuple[str, dict[str, Any] | None]:
async with semaphore:
stats = await self.get_repository_stats(repo_path, use_cache)
return repo_path, stats
@ -220,7 +233,7 @@ class GitHubAPIClient:
if isinstance(result, Exception):
logger.error(f"Error in concurrent GitHub fetch: {result}")
continue
repo_path, stats = result
repo_stats[repo_path] = stats
@ -231,14 +244,14 @@ class GitHubAPIClient:
self._cache.clear()
logger.debug("GitHub cache cleared")
async def get_rate_limit(self) -> Optional[Dict[str, Any]]:
async def get_rate_limit(self) -> dict[str, Any] | None:
"""Get current GitHub API rate limit status.
Returns:
Dictionary containing rate limit information
"""
url = f"{self.base_url}/rate_limit"
try:
data = await self._make_request(url)
if data:
@ -246,4 +259,4 @@ class GitHubAPIClient:
return None
except Exception as e:
logger.error(f"Error fetching GitHub rate limit: {e}")
return None
return None

View File

@ -182,7 +182,7 @@ class PyPIClient:
NetworkError: For network-related errors
"""
normalized_name = self._validate_package_name(package_name)
# Create cache key that includes version info
cache_suffix = f"v{version}" if version else "latest"
cache_key = self._get_cache_key(normalized_name, f"info_{cache_suffix}")
@ -191,13 +191,17 @@ class PyPIClient:
if use_cache and cache_key in self._cache:
cache_entry = self._cache[cache_key]
if self._is_cache_valid(cache_entry):
logger.debug(f"Using cached data for package: {normalized_name} version: {version or 'latest'}")
logger.debug(
f"Using cached data for package: {normalized_name} version: {version or 'latest'}"
)
return cache_entry["data"]
# Build URL - include version if specified
if version:
url = f"{self.base_url}/{quote(normalized_name)}/{quote(version)}/json"
logger.info(f"Fetching package info for: {normalized_name} version {version}")
logger.info(
f"Fetching package info for: {normalized_name} version {version}"
)
else:
url = f"{self.base_url}/{quote(normalized_name)}/json"
logger.info(f"Fetching package info for: {normalized_name} (latest)")
@ -215,13 +219,19 @@ class PyPIClient:
except PackageNotFoundError as e:
if version:
# More specific error message for version not found
logger.error(f"Version {version} not found for package {normalized_name}")
raise PackageNotFoundError(f"Version {version} not found for package {normalized_name}")
logger.error(
f"Version {version} not found for package {normalized_name}"
)
raise PackageNotFoundError(
f"Version {version} not found for package {normalized_name}"
)
else:
logger.error(f"Failed to fetch package info for {normalized_name}: {e}")
raise
except Exception as e:
logger.error(f"Failed to fetch package info for {normalized_name} version {version or 'latest'}: {e}")
logger.error(
f"Failed to fetch package info for {normalized_name} version {version or 'latest'}: {e}"
)
raise
async def get_package_versions(
@ -236,7 +246,9 @@ class PyPIClient:
Returns:
List of version strings
"""
package_info = await self.get_package_info(package_name, version=None, use_cache=use_cache)
package_info = await self.get_package_info(
package_name, version=None, use_cache=use_cache
)
releases = package_info.get("releases", {})
return list(releases.keys())
@ -252,7 +264,9 @@ class PyPIClient:
Returns:
Latest version string
"""
package_info = await self.get_package_info(package_name, version=None, use_cache=use_cache)
package_info = await self.get_package_info(
package_name, version=None, use_cache=use_cache
)
return package_info.get("info", {}).get("version", "")
def clear_cache(self):

View File

@ -5,7 +5,7 @@ import logging
import random
import time
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from typing import Any
import httpx
@ -50,7 +50,7 @@ class PyPIStatsClient:
self._cache: dict[str, dict[str, Any]] = {}
self._cache_ttl = 86400 # 24 hours (increased for resilience)
self._fallback_cache_ttl = 604800 # 7 days for fallback data
# Track API health for smart fallback decisions
self._api_health = {
"last_success": None,
@ -106,31 +106,33 @@ class PyPIStatsClient:
)
return f"{endpoint}:{package_name}:{param_str}"
def _is_cache_valid(self, cache_entry: dict[str, Any], fallback: bool = False) -> bool:
def _is_cache_valid(
self, cache_entry: dict[str, Any], fallback: bool = False
) -> bool:
"""Check if cache entry is still valid.
Args:
cache_entry: Cache entry to validate
fallback: Whether to use fallback cache TTL (longer for resilience)
"""
ttl = self._fallback_cache_ttl if fallback else self._cache_ttl
return time.time() - cache_entry.get("timestamp", 0) < ttl
def _should_use_fallback(self) -> bool:
"""Determine if fallback mechanisms should be used based on API health."""
if not self.fallback_enabled:
return False
# Use fallback if we've had multiple consecutive failures
if self._api_health["consecutive_failures"] >= 3:
return True
# Use fallback if last success was more than 1 hour ago
if self._api_health["last_success"]:
time_since_success = time.time() - self._api_health["last_success"]
if time_since_success > 3600: # 1 hour
return True
return False
async def _make_request(self, url: str) -> dict[str, Any]:
@ -152,7 +154,9 @@ class PyPIStatsClient:
for attempt in range(self.max_retries + 1):
try:
logger.debug(f"Making request to {url} (attempt {attempt + 1}/{self.max_retries + 1})")
logger.debug(
f"Making request to {url} (attempt {attempt + 1}/{self.max_retries + 1})"
)
response = await self._client.get(url)
@ -171,16 +175,25 @@ class PyPIStatsClient:
elif response.status_code == 429:
retry_after = response.headers.get("Retry-After")
retry_after_int = int(retry_after) if retry_after else None
self._update_api_failure(f"Rate limit exceeded (retry after {retry_after_int}s)")
self._update_api_failure(
f"Rate limit exceeded (retry after {retry_after_int}s)"
)
raise RateLimitError(retry_after_int)
elif response.status_code >= 500:
error_msg = f"Server error: HTTP {response.status_code}"
self._update_api_failure(error_msg)
# For 502/503/504 errors, continue retrying
if response.status_code in [502, 503, 504] and attempt < self.max_retries:
last_exception = PyPIServerError(response.status_code, error_msg)
logger.warning(f"Retryable server error {response.status_code}, attempt {attempt + 1}")
if (
response.status_code in [502, 503, 504]
and attempt < self.max_retries
):
last_exception = PyPIServerError(
response.status_code, error_msg
)
logger.warning(
f"Retryable server error {response.status_code}, attempt {attempt + 1}"
)
else:
raise PyPIServerError(response.status_code, error_msg)
else:
@ -205,7 +218,9 @@ class PyPIStatsClient:
# Only retry certain server errors
if e.status_code in [502, 503, 504] and attempt < self.max_retries:
last_exception = e
logger.warning(f"Retrying server error {e.status_code}, attempt {attempt + 1}")
logger.warning(
f"Retrying server error {e.status_code}, attempt {attempt + 1}"
)
else:
raise
except Exception as e:
@ -216,7 +231,7 @@ class PyPIStatsClient:
# Calculate exponential backoff with jitter
if attempt < self.max_retries:
base_delay = self.retry_delay * (2 ** attempt)
base_delay = self.retry_delay * (2**attempt)
jitter = random.uniform(0.1, 0.3) * base_delay # Add 10-30% jitter
delay = base_delay + jitter
logger.debug(f"Waiting {delay:.2f}s before retry...")
@ -227,21 +242,25 @@ class PyPIStatsClient:
raise last_exception
else:
raise NetworkError("All retry attempts failed with unknown error")
def _update_api_failure(self, error_msg: str) -> None:
"""Update API health tracking on failure."""
self._api_health["consecutive_failures"] += 1
self._api_health["last_error"] = error_msg
logger.debug(f"API failure count: {self._api_health['consecutive_failures']}, error: {error_msg}")
def _generate_fallback_recent_downloads(self, package_name: str, period: str = "month") -> dict[str, Any]:
logger.debug(
f"API failure count: {self._api_health['consecutive_failures']}, error: {error_msg}"
)
def _generate_fallback_recent_downloads(
self, package_name: str, period: str = "month"
) -> dict[str, Any]:
"""Generate fallback download statistics when API is unavailable.
This provides estimated download counts based on package popularity patterns
to ensure the system remains functional during API outages.
"""
logger.warning(f"Generating fallback download data for {package_name}")
# Base estimates for popular packages (these are conservative estimates)
popular_packages = {
"requests": {"day": 1500000, "week": 10500000, "month": 45000000},
@ -270,39 +289,50 @@ class PyPIStatsClient:
"pandas": {"day": 200000, "week": 1400000, "month": 6000000},
"sqlalchemy": {"day": 90000, "week": 630000, "month": 2700000},
}
# Get estimates for known packages or generate based on package name characteristics
if package_name.lower() in popular_packages:
estimates = popular_packages[package_name.lower()]
else:
# Generate estimates based on common package patterns
if any(keyword in package_name.lower() for keyword in ["test", "dev", "debug"]):
if any(
keyword in package_name.lower() for keyword in ["test", "dev", "debug"]
):
# Development/testing packages - lower usage
base_daily = random.randint(100, 1000)
elif any(keyword in package_name.lower() for keyword in ["aws", "google", "microsoft", "azure"]):
elif any(
keyword in package_name.lower()
for keyword in ["aws", "google", "microsoft", "azure"]
):
# Cloud provider packages - higher usage
base_daily = random.randint(10000, 50000)
elif any(keyword in package_name.lower() for keyword in ["http", "request", "client", "api"]):
elif any(
keyword in package_name.lower()
for keyword in ["http", "request", "client", "api"]
):
# HTTP/API packages - moderate to high usage
base_daily = random.randint(5000, 25000)
elif any(keyword in package_name.lower() for keyword in ["data", "pandas", "numpy", "scipy"]):
elif any(
keyword in package_name.lower()
for keyword in ["data", "pandas", "numpy", "scipy"]
):
# Data science packages - high usage
base_daily = random.randint(15000, 75000)
else:
# Generic packages - moderate usage
base_daily = random.randint(1000, 10000)
estimates = {
"day": base_daily,
"week": base_daily * 7,
"month": base_daily * 30,
}
# Add some realistic variation (±20%)
variation = random.uniform(0.8, 1.2)
for key in estimates:
estimates[key] = int(estimates[key] * variation)
return {
"data": {
"last_day": estimates["day"],
@ -314,42 +344,48 @@ class PyPIStatsClient:
"source": "fallback_estimates",
"note": "Estimated data due to API unavailability. Actual values may differ.",
}
def _generate_fallback_overall_downloads(self, package_name: str, mirrors: bool = False) -> dict[str, Any]:
def _generate_fallback_overall_downloads(
self, package_name: str, mirrors: bool = False
) -> dict[str, Any]:
"""Generate fallback time series data when API is unavailable."""
logger.warning(f"Generating fallback time series data for {package_name}")
# Generate 180 days of synthetic time series data
time_series = []
base_date = datetime.now() - timedelta(days=180)
# Get base daily estimate from recent downloads fallback
recent_fallback = self._generate_fallback_recent_downloads(package_name)
base_daily = recent_fallback["data"]["last_day"]
for i in range(180):
current_date = base_date + timedelta(days=i)
# Add weekly and seasonal patterns
day_of_week = current_date.weekday()
# Lower downloads on weekends
week_factor = 0.7 if day_of_week >= 5 else 1.0
# Add some growth trend (packages generally grow over time)
growth_factor = 1.0 + (i / 180) * 0.3 # 30% growth over 180 days
# Add random daily variation
daily_variation = random.uniform(0.7, 1.3)
daily_downloads = int(base_daily * week_factor * growth_factor * daily_variation)
daily_downloads = int(
base_daily * week_factor * growth_factor * daily_variation
)
category = "with_mirrors" if mirrors else "without_mirrors"
time_series.append({
"category": category,
"date": current_date.strftime("%Y-%m-%d"),
"downloads": daily_downloads,
})
time_series.append(
{
"category": category,
"date": current_date.strftime("%Y-%m-%d"),
"downloads": daily_downloads,
}
)
return {
"data": time_series,
"package": package_name,
@ -385,16 +421,24 @@ class PyPIStatsClient:
if self._is_cache_valid(cache_entry):
logger.debug(f"Using cached recent downloads for: {normalized_name}")
return cache_entry["data"]
elif self._should_use_fallback() and self._is_cache_valid(cache_entry, fallback=True):
logger.info(f"Using extended cache (fallback mode) for: {normalized_name}")
elif self._should_use_fallback() and self._is_cache_valid(
cache_entry, fallback=True
):
logger.info(
f"Using extended cache (fallback mode) for: {normalized_name}"
)
cache_entry["data"]["note"] = "Extended cache data due to API issues"
return cache_entry["data"]
# Check if we should use fallback immediately
if self._should_use_fallback():
logger.warning(f"API health poor, using fallback data for: {normalized_name}")
fallback_data = self._generate_fallback_recent_downloads(normalized_name, period)
logger.warning(
f"API health poor, using fallback data for: {normalized_name}"
)
fallback_data = self._generate_fallback_recent_downloads(
normalized_name, period
)
# Cache fallback data with extended TTL
self._cache[cache_key] = {"data": fallback_data, "timestamp": time.time()}
return fallback_data
@ -418,28 +462,39 @@ class PyPIStatsClient:
except (PyPIServerError, NetworkError) as e:
logger.error(f"API request failed for {normalized_name}: {e}")
# Try to use stale cache data if available
if use_cache and cache_key in self._cache:
cache_entry = self._cache[cache_key]
logger.warning(f"Using stale cache data for {normalized_name} due to API failure")
logger.warning(
f"Using stale cache data for {normalized_name} due to API failure"
)
cache_entry["data"]["note"] = f"Stale cache data due to API error: {e}"
return cache_entry["data"]
# Last resort: generate fallback data
if self.fallback_enabled:
logger.warning(f"Generating fallback data for {normalized_name} due to API failure")
fallback_data = self._generate_fallback_recent_downloads(normalized_name, period)
logger.warning(
f"Generating fallback data for {normalized_name} due to API failure"
)
fallback_data = self._generate_fallback_recent_downloads(
normalized_name, period
)
# Cache fallback data
self._cache[cache_key] = {"data": fallback_data, "timestamp": time.time()}
self._cache[cache_key] = {
"data": fallback_data,
"timestamp": time.time(),
}
return fallback_data
# If fallback is disabled, re-raise the original exception
raise
except Exception as e:
logger.error(f"Unexpected error fetching recent downloads for {normalized_name}: {e}")
logger.error(
f"Unexpected error fetching recent downloads for {normalized_name}: {e}"
)
raise
async def get_overall_downloads(
@ -469,16 +524,24 @@ class PyPIStatsClient:
if self._is_cache_valid(cache_entry):
logger.debug(f"Using cached overall downloads for: {normalized_name}")
return cache_entry["data"]
elif self._should_use_fallback() and self._is_cache_valid(cache_entry, fallback=True):
logger.info(f"Using extended cache (fallback mode) for: {normalized_name}")
elif self._should_use_fallback() and self._is_cache_valid(
cache_entry, fallback=True
):
logger.info(
f"Using extended cache (fallback mode) for: {normalized_name}"
)
cache_entry["data"]["note"] = "Extended cache data due to API issues"
return cache_entry["data"]
# Check if we should use fallback immediately
if self._should_use_fallback():
logger.warning(f"API health poor, using fallback data for: {normalized_name}")
fallback_data = self._generate_fallback_overall_downloads(normalized_name, mirrors)
logger.warning(
f"API health poor, using fallback data for: {normalized_name}"
)
fallback_data = self._generate_fallback_overall_downloads(
normalized_name, mirrors
)
# Cache fallback data with extended TTL
self._cache[cache_key] = {"data": fallback_data, "timestamp": time.time()}
return fallback_data
@ -502,28 +565,39 @@ class PyPIStatsClient:
except (PyPIServerError, NetworkError) as e:
logger.error(f"API request failed for {normalized_name}: {e}")
# Try to use stale cache data if available
if use_cache and cache_key in self._cache:
cache_entry = self._cache[cache_key]
logger.warning(f"Using stale cache data for {normalized_name} due to API failure")
logger.warning(
f"Using stale cache data for {normalized_name} due to API failure"
)
cache_entry["data"]["note"] = f"Stale cache data due to API error: {e}"
return cache_entry["data"]
# Last resort: generate fallback data
if self.fallback_enabled:
logger.warning(f"Generating fallback data for {normalized_name} due to API failure")
fallback_data = self._generate_fallback_overall_downloads(normalized_name, mirrors)
logger.warning(
f"Generating fallback data for {normalized_name} due to API failure"
)
fallback_data = self._generate_fallback_overall_downloads(
normalized_name, mirrors
)
# Cache fallback data
self._cache[cache_key] = {"data": fallback_data, "timestamp": time.time()}
self._cache[cache_key] = {
"data": fallback_data,
"timestamp": time.time(),
}
return fallback_data
# If fallback is disabled, re-raise the original exception
raise
except Exception as e:
logger.error(f"Unexpected error fetching overall downloads for {normalized_name}: {e}")
logger.error(
f"Unexpected error fetching overall downloads for {normalized_name}: {e}"
)
raise
def clear_cache(self):

View File

@ -288,31 +288,31 @@ class VersionCompatibility:
def sort_versions_semantically(versions: list[str], reverse: bool = True) -> list[str]:
"""Sort package versions using semantic version ordering.
This function properly sorts versions by parsing them as semantic versions,
ensuring that pre-release versions (alpha, beta, rc) are ordered correctly
relative to stable releases.
Args:
versions: List of version strings to sort
reverse: If True, sort in descending order (newest first). Default True.
Returns:
List of version strings sorted semantically
Examples:
>>> sort_versions_semantically(['1.0.0', '2.0.0a1', '1.5.0', '2.0.0'])
['2.0.0', '2.0.0a1', '1.5.0', '1.0.0']
>>> sort_versions_semantically(['5.2rc1', '5.2.5', '5.2.0'])
['5.2.5', '5.2.0', '5.2rc1']
"""
if not versions:
return []
def parse_version_safe(version_str: str) -> tuple[Version | None, str]:
"""Safely parse a version string, returning (parsed_version, original_string).
Returns (None, original_string) if parsing fails.
"""
try:
@ -320,26 +320,26 @@ def sort_versions_semantically(versions: list[str], reverse: bool = True) -> lis
except InvalidVersion:
logger.debug(f"Failed to parse version '{version_str}' as semantic version")
return (None, version_str)
# Parse all versions, keeping track of originals
parsed_versions = [parse_version_safe(v) for v in versions]
# Separate valid and invalid versions
valid_versions = [(v, orig) for v, orig in parsed_versions if v is not None]
invalid_versions = [orig for v, orig in parsed_versions if v is None]
# Sort valid versions semantically
valid_versions.sort(key=lambda x: x[0], reverse=reverse)
# Sort invalid versions lexicographically as fallback
invalid_versions.sort(reverse=reverse)
# Combine results: valid versions first, then invalid ones
result = [orig for _, orig in valid_versions] + invalid_versions
logger.debug(
f"Sorted {len(versions)} versions: {len(valid_versions)} valid, "
f"{len(invalid_versions)} invalid"
)
return result

View File

@ -1 +1 @@
"""Data module for PyPI package information."""
"""Data module for PyPI package information."""

View File

@ -10,10 +10,12 @@ The rankings and download estimates are based on:
Data is organized by categories and includes estimated relative popularity.
"""
from typing import Dict, List, NamedTuple
from typing import NamedTuple
class PackageInfo(NamedTuple):
"""Information about a popular package."""
name: str
category: str
estimated_monthly_downloads: int
@ -21,60 +23,226 @@ class PackageInfo(NamedTuple):
description: str
primary_use_case: str
# Core packages that are dependencies for many other packages
INFRASTRUCTURE_PACKAGES = [
PackageInfo("setuptools", "packaging", 800_000_000, 2100, "Package development tools", "packaging"),
PackageInfo("wheel", "packaging", 700_000_000, 400, "Binary package format", "packaging"),
PackageInfo("pip", "packaging", 600_000_000, 9500, "Package installer", "packaging"),
PackageInfo("certifi", "security", 500_000_000, 800, "Certificate bundle", "security"),
PackageInfo("urllib3", "networking", 450_000_000, 3600, "HTTP client library", "networking"),
PackageInfo("charset-normalizer", "text", 400_000_000, 400, "Character encoding detection", "text-processing"),
PackageInfo("idna", "networking", 380_000_000, 200, "Internationalized domain names", "networking"),
PackageInfo("six", "compatibility", 350_000_000, 900, "Python 2 and 3 compatibility", "compatibility"),
PackageInfo("python-dateutil", "datetime", 320_000_000, 2200, "Date and time utilities", "datetime"),
PackageInfo("requests", "networking", 300_000_000, 51000, "HTTP library", "networking"),
PackageInfo(
"setuptools",
"packaging",
800_000_000,
2100,
"Package development tools",
"packaging",
),
PackageInfo(
"wheel", "packaging", 700_000_000, 400, "Binary package format", "packaging"
),
PackageInfo(
"pip", "packaging", 600_000_000, 9500, "Package installer", "packaging"
),
PackageInfo(
"certifi", "security", 500_000_000, 800, "Certificate bundle", "security"
),
PackageInfo(
"urllib3", "networking", 450_000_000, 3600, "HTTP client library", "networking"
),
PackageInfo(
"charset-normalizer",
"text",
400_000_000,
400,
"Character encoding detection",
"text-processing",
),
PackageInfo(
"idna",
"networking",
380_000_000,
200,
"Internationalized domain names",
"networking",
),
PackageInfo(
"six",
"compatibility",
350_000_000,
900,
"Python 2 and 3 compatibility",
"compatibility",
),
PackageInfo(
"python-dateutil",
"datetime",
320_000_000,
2200,
"Date and time utilities",
"datetime",
),
PackageInfo(
"requests", "networking", 300_000_000, 51000, "HTTP library", "networking"
),
]
# AWS and cloud packages
CLOUD_PACKAGES = [
PackageInfo("boto3", "cloud", 280_000_000, 8900, "AWS SDK", "cloud"),
PackageInfo("botocore", "cloud", 275_000_000, 1400, "AWS SDK core", "cloud"),
PackageInfo("s3transfer", "cloud", 250_000_000, 200, "S3 transfer utilities", "cloud"),
PackageInfo(
"s3transfer", "cloud", 250_000_000, 200, "S3 transfer utilities", "cloud"
),
PackageInfo("awscli", "cloud", 80_000_000, 15000, "AWS command line", "cloud"),
PackageInfo("azure-core", "cloud", 45_000_000, 400, "Azure SDK core", "cloud"),
PackageInfo("google-cloud-storage", "cloud", 35_000_000, 300, "Google Cloud Storage", "cloud"),
PackageInfo("azure-storage-blob", "cloud", 30_000_000, 200, "Azure Blob Storage", "cloud"),
PackageInfo(
"google-cloud-storage",
"cloud",
35_000_000,
300,
"Google Cloud Storage",
"cloud",
),
PackageInfo(
"azure-storage-blob", "cloud", 30_000_000, 200, "Azure Blob Storage", "cloud"
),
]
# Data science and ML packages
DATA_SCIENCE_PACKAGES = [
PackageInfo("numpy", "data-science", 200_000_000, 26000, "Numerical computing", "data-science"),
PackageInfo("pandas", "data-science", 150_000_000, 42000, "Data manipulation", "data-science"),
PackageInfo("scikit-learn", "machine-learning", 80_000_000, 58000, "Machine learning", "machine-learning"),
PackageInfo("matplotlib", "visualization", 75_000_000, 19000, "Plotting library", "visualization"),
PackageInfo("scipy", "data-science", 70_000_000, 12000, "Scientific computing", "data-science"),
PackageInfo("seaborn", "visualization", 45_000_000, 11000, "Statistical visualization", "visualization"),
PackageInfo("plotly", "visualization", 40_000_000, 15000, "Interactive plots", "visualization"),
PackageInfo("jupyter", "development", 35_000_000, 7000, "Interactive notebooks", "development"),
PackageInfo("ipython", "development", 50_000_000, 8000, "Interactive Python", "development"),
PackageInfo("tensorflow", "machine-learning", 25_000_000, 185000, "Deep learning", "machine-learning"),
PackageInfo("torch", "machine-learning", 20_000_000, 81000, "PyTorch deep learning", "machine-learning"),
PackageInfo("transformers", "machine-learning", 15_000_000, 130000, "NLP transformers", "machine-learning"),
PackageInfo(
"numpy",
"data-science",
200_000_000,
26000,
"Numerical computing",
"data-science",
),
PackageInfo(
"pandas",
"data-science",
150_000_000,
42000,
"Data manipulation",
"data-science",
),
PackageInfo(
"scikit-learn",
"machine-learning",
80_000_000,
58000,
"Machine learning",
"machine-learning",
),
PackageInfo(
"matplotlib",
"visualization",
75_000_000,
19000,
"Plotting library",
"visualization",
),
PackageInfo(
"scipy",
"data-science",
70_000_000,
12000,
"Scientific computing",
"data-science",
),
PackageInfo(
"seaborn",
"visualization",
45_000_000,
11000,
"Statistical visualization",
"visualization",
),
PackageInfo(
"plotly",
"visualization",
40_000_000,
15000,
"Interactive plots",
"visualization",
),
PackageInfo(
"jupyter",
"development",
35_000_000,
7000,
"Interactive notebooks",
"development",
),
PackageInfo(
"ipython", "development", 50_000_000, 8000, "Interactive Python", "development"
),
PackageInfo(
"tensorflow",
"machine-learning",
25_000_000,
185000,
"Deep learning",
"machine-learning",
),
PackageInfo(
"torch",
"machine-learning",
20_000_000,
81000,
"PyTorch deep learning",
"machine-learning",
),
PackageInfo(
"transformers",
"machine-learning",
15_000_000,
130000,
"NLP transformers",
"machine-learning",
),
]
# Development and testing
DEVELOPMENT_PACKAGES = [
PackageInfo("typing-extensions", "development", 180_000_000, 3000, "Typing extensions", "development"),
PackageInfo("packaging", "development", 160_000_000, 600, "Package utilities", "development"),
PackageInfo("pytest", "testing", 100_000_000, 11000, "Testing framework", "testing"),
PackageInfo(
"typing-extensions",
"development",
180_000_000,
3000,
"Typing extensions",
"development",
),
PackageInfo(
"packaging", "development", 160_000_000, 600, "Package utilities", "development"
),
PackageInfo(
"pytest", "testing", 100_000_000, 11000, "Testing framework", "testing"
),
PackageInfo("click", "cli", 90_000_000, 15000, "Command line interface", "cli"),
PackageInfo("pyyaml", "serialization", 85_000_000, 2200, "YAML parser", "serialization"),
PackageInfo("jinja2", "templating", 80_000_000, 10000, "Template engine", "templating"),
PackageInfo("markupsafe", "templating", 75_000_000, 600, "Safe markup", "templating"),
PackageInfo("attrs", "development", 60_000_000, 5000, "Classes without boilerplate", "development"),
PackageInfo("black", "development", 40_000_000, 38000, "Code formatter", "development"),
PackageInfo("flake8", "development", 35_000_000, 3000, "Code linting", "development"),
PackageInfo("mypy", "development", 30_000_000, 17000, "Static type checker", "development"),
PackageInfo(
"pyyaml", "serialization", 85_000_000, 2200, "YAML parser", "serialization"
),
PackageInfo(
"jinja2", "templating", 80_000_000, 10000, "Template engine", "templating"
),
PackageInfo(
"markupsafe", "templating", 75_000_000, 600, "Safe markup", "templating"
),
PackageInfo(
"attrs",
"development",
60_000_000,
5000,
"Classes without boilerplate",
"development",
),
PackageInfo(
"black", "development", 40_000_000, 38000, "Code formatter", "development"
),
PackageInfo(
"flake8", "development", 35_000_000, 3000, "Code linting", "development"
),
PackageInfo(
"mypy", "development", 30_000_000, 17000, "Static type checker", "development"
),
]
# Web development
@ -83,49 +251,87 @@ WEB_PACKAGES = [
PackageInfo("flask", "web", 55_000_000, 66000, "Micro web framework", "web"),
PackageInfo("fastapi", "web", 35_000_000, 74000, "Modern web API framework", "web"),
PackageInfo("sqlalchemy", "database", 50_000_000, 8000, "SQL toolkit", "database"),
PackageInfo("psycopg2", "database", 25_000_000, 3000, "PostgreSQL adapter", "database"),
PackageInfo(
"psycopg2", "database", 25_000_000, 3000, "PostgreSQL adapter", "database"
),
PackageInfo("redis", "database", 30_000_000, 12000, "Redis client", "database"),
PackageInfo("celery", "async", 25_000_000, 23000, "Distributed task queue", "async"),
PackageInfo(
"celery", "async", 25_000_000, 23000, "Distributed task queue", "async"
),
PackageInfo("gunicorn", "web", 20_000_000, 9000, "WSGI server", "web"),
PackageInfo("uvicorn", "web", 15_000_000, 8000, "ASGI server", "web"),
]
# Security and cryptography
SECURITY_PACKAGES = [
PackageInfo("cryptography", "security", 120_000_000, 6000, "Cryptographic library", "security"),
PackageInfo("pyopenssl", "security", 60_000_000, 800, "OpenSSL wrapper", "security"),
PackageInfo(
"cryptography",
"security",
120_000_000,
6000,
"Cryptographic library",
"security",
),
PackageInfo(
"pyopenssl", "security", 60_000_000, 800, "OpenSSL wrapper", "security"
),
PackageInfo("pyjwt", "security", 40_000_000, 5000, "JSON Web Tokens", "security"),
PackageInfo("bcrypt", "security", 35_000_000, 1200, "Password hashing", "security"),
PackageInfo("pycryptodome", "security", 30_000_000, 2700, "Cryptographic library", "security"),
PackageInfo(
"pycryptodome",
"security",
30_000_000,
2700,
"Cryptographic library",
"security",
),
]
# Networking and API
NETWORKING_PACKAGES = [
PackageInfo("httpx", "networking", 25_000_000, 12000, "HTTP client", "networking"),
PackageInfo("aiohttp", "networking", 35_000_000, 14000, "Async HTTP", "networking"),
PackageInfo("websockets", "networking", 20_000_000, 5000, "WebSocket implementation", "networking"),
PackageInfo(
"websockets",
"networking",
20_000_000,
5000,
"WebSocket implementation",
"networking",
),
PackageInfo("paramiko", "networking", 25_000_000, 8000, "SSH client", "networking"),
]
# Text processing and parsing
TEXT_PACKAGES = [
PackageInfo("beautifulsoup4", "parsing", 40_000_000, 13000, "HTML/XML parser", "parsing"),
PackageInfo(
"beautifulsoup4", "parsing", 40_000_000, 13000, "HTML/XML parser", "parsing"
),
PackageInfo("lxml", "parsing", 35_000_000, 2600, "XML/HTML parser", "parsing"),
PackageInfo("regex", "text", 30_000_000, 700, "Regular expressions", "text-processing"),
PackageInfo("python-docx", "text", 15_000_000, 4000, "Word document processing", "text-processing"),
PackageInfo(
"regex", "text", 30_000_000, 700, "Regular expressions", "text-processing"
),
PackageInfo(
"python-docx",
"text",
15_000_000,
4000,
"Word document processing",
"text-processing",
),
PackageInfo("pillow", "imaging", 60_000_000, 11000, "Image processing", "imaging"),
]
# All packages combined for easy access
ALL_POPULAR_PACKAGES = (
INFRASTRUCTURE_PACKAGES +
CLOUD_PACKAGES +
DATA_SCIENCE_PACKAGES +
DEVELOPMENT_PACKAGES +
WEB_PACKAGES +
SECURITY_PACKAGES +
NETWORKING_PACKAGES +
TEXT_PACKAGES
INFRASTRUCTURE_PACKAGES
+ CLOUD_PACKAGES
+ DATA_SCIENCE_PACKAGES
+ DEVELOPMENT_PACKAGES
+ WEB_PACKAGES
+ SECURITY_PACKAGES
+ NETWORKING_PACKAGES
+ TEXT_PACKAGES
)
# Create lookup dictionaries
@ -136,41 +342,45 @@ for pkg in ALL_POPULAR_PACKAGES:
PACKAGES_BY_CATEGORY[pkg.category] = []
PACKAGES_BY_CATEGORY[pkg.category].append(pkg)
def get_popular_packages(
category: str = None,
limit: int = 50,
min_downloads: int = 0
) -> List[PackageInfo]:
category: str = None, limit: int = 50, min_downloads: int = 0
) -> list[PackageInfo]:
"""Get popular packages filtered by criteria.
Args:
category: Filter by category (e.g., 'web', 'data-science', 'cloud')
limit: Maximum number of packages to return
min_downloads: Minimum estimated monthly downloads
Returns:
List of PackageInfo objects sorted by estimated downloads
"""
packages = ALL_POPULAR_PACKAGES
if category:
packages = [pkg for pkg in packages if pkg.category == category]
if min_downloads:
packages = [pkg for pkg in packages if pkg.estimated_monthly_downloads >= min_downloads]
packages = [
pkg for pkg in packages if pkg.estimated_monthly_downloads >= min_downloads
]
# Sort by estimated downloads (descending)
packages = sorted(packages, key=lambda x: x.estimated_monthly_downloads, reverse=True)
packages = sorted(
packages, key=lambda x: x.estimated_monthly_downloads, reverse=True
)
return packages[:limit]
def estimate_downloads_for_period(monthly_downloads: int, period: str) -> int:
"""Estimate downloads for different time periods.
Args:
monthly_downloads: Estimated monthly downloads
period: Time period ('day', 'week', 'month')
Returns:
Estimated downloads for the period
"""
@ -183,16 +393,20 @@ def estimate_downloads_for_period(monthly_downloads: int, period: str) -> int:
else:
return monthly_downloads
def get_package_info(package_name: str) -> PackageInfo:
"""Get information about a specific package.
Args:
package_name: Name of the package
Returns:
PackageInfo object or None if not found
"""
return PACKAGES_BY_NAME.get(package_name.lower().replace("-", "_").replace("_", "-"))
return PACKAGES_BY_NAME.get(
package_name.lower().replace("-", "_").replace("_", "-")
)
# GitHub repository URL patterns for fetching real-time data
GITHUB_REPO_PATTERNS = {
@ -211,4 +425,4 @@ GITHUB_REPO_PATTERNS = {
"boto3": "boto/boto3",
"sqlalchemy": "sqlalchemy/sqlalchemy",
# Add more mappings as needed
}
}

View File

@ -136,11 +136,11 @@ async def get_package_versions(package_name: str) -> dict[str, Any]:
@mcp.tool()
async def get_package_dependencies(
package_name: str,
package_name: str,
version: str | None = None,
include_transitive: bool = False,
max_depth: int = 5,
python_version: str | None = None
python_version: str | None = None,
) -> dict[str, Any]:
"""Get dependency information for a PyPI package.
@ -175,7 +175,11 @@ async def get_package_dependencies(
logger.info(
f"MCP tool: Querying dependencies for {package_name}"
+ (f" version {version}" if version else " (latest)")
+ (f" with transitive dependencies (max depth: {max_depth})" if include_transitive else " (direct only)")
+ (
f" with transitive dependencies (max depth: {max_depth})"
if include_transitive
else " (direct only)"
)
)
result = await query_package_dependencies(
package_name, version, include_transitive, max_depth, python_version
@ -326,9 +330,9 @@ async def resolve_dependencies(
Args:
package_name: The name of the PyPI package to analyze (e.g., 'pyside2', 'django')
python_version: Target Python version for dependency filtering (e.g., '3.10', '3.11')
include_extras: List of extra dependency groups to include. These are optional
dependency groups defined by the package (e.g., ['socks'] for requests,
['argon2', 'bcrypt'] for django, ['test', 'doc'] for setuptools). Check the
include_extras: List of extra dependency groups to include. These are optional
dependency groups defined by the package (e.g., ['socks'] for requests,
['argon2', 'bcrypt'] for django, ['test', 'doc'] for setuptools). Check the
package's PyPI page or use the provides_extra field to see available extras.
include_dev: Whether to include development dependencies (default: False)
max_depth: Maximum recursion depth for dependency resolution (default: 5)
@ -397,8 +401,8 @@ async def download_package(
package_name: The name of the PyPI package to download (e.g., 'pyside2', 'requests')
download_dir: Local directory to download packages to (default: './downloads')
python_version: Target Python version for compatibility (e.g., '3.10', '3.11')
include_extras: List of extra dependency groups to include. These are optional
dependency groups defined by the package (e.g., ['socks'] for requests,
include_extras: List of extra dependency groups to include. These are optional
dependency groups defined by the package (e.g., ['socks'] for requests,
['argon2', 'bcrypt'] for django). Check the package's PyPI page to see available extras.
include_dev: Whether to include development dependencies (default: False)
prefer_wheel: Whether to prefer wheel files over source distributions (default: True)

View File

@ -35,7 +35,7 @@ class DependencyResolver:
Args:
package_name: Name of the package to resolve
python_version: Target Python version (e.g., "3.10")
include_extras: List of extra dependency groups to include (e.g., ['socks'] for requests,
include_extras: List of extra dependency groups to include (e.g., ['socks'] for requests,
['test', 'doc'] for setuptools). These are optional dependencies defined by the package.
include_dev: Whether to include development dependencies
max_depth: Maximum recursion depth (overrides instance default)
@ -243,7 +243,7 @@ async def resolve_package_dependencies(
Args:
package_name: Name of the package to resolve
python_version: Target Python version (e.g., "3.10")
include_extras: List of extra dependency groups to include (e.g., ['socks'] for requests,
include_extras: List of extra dependency groups to include (e.g., ['socks'] for requests,
['test', 'doc'] for setuptools). These are optional dependencies defined by the package.
include_dev: Whether to include development dependencies
max_depth: Maximum recursion depth

View File

@ -3,14 +3,13 @@
import logging
import os
from datetime import datetime
from typing import Any, Dict, List, Optional
from typing import Any
from ..core.github_client import GitHubAPIClient
from ..core.pypi_client import PyPIClient
from ..core.stats_client import PyPIStatsClient
from ..data.popular_packages import (
GITHUB_REPO_PATTERNS,
PACKAGES_BY_NAME,
estimate_downloads_for_period,
get_popular_packages,
)
@ -73,11 +72,11 @@ async def get_package_download_stats(
# Calculate trends and analysis
analysis = _analyze_download_stats(download_data)
# Determine data source and add warnings if needed
data_source = recent_stats.get("source", "pypistats.org")
warning_note = recent_stats.get("note")
result = {
"package": package_name,
"metadata": package_metadata,
@ -87,15 +86,17 @@ async def get_package_download_stats(
"data_source": data_source,
"timestamp": datetime.now().isoformat(),
}
# Add warning/note about data quality if present
if warning_note:
result["data_quality_note"] = warning_note
# Add reliability indicator
if data_source == "fallback_estimates":
result["reliability"] = "estimated"
result["warning"] = "Data is estimated due to API unavailability. Actual download counts may differ significantly."
result["warning"] = (
"Data is estimated due to API unavailability. Actual download counts may differ significantly."
)
elif "stale" in warning_note.lower() if warning_note else False:
result["reliability"] = "cached"
result["warning"] = "Data may be outdated due to current API issues."
@ -142,7 +143,7 @@ async def get_package_download_trends(
# Analyze trends
trend_analysis = _analyze_download_trends(time_series_data, include_mirrors)
# Determine data source and add warnings if needed
data_source = overall_stats.get("source", "pypistats.org")
warning_note = overall_stats.get("note")
@ -155,15 +156,17 @@ async def get_package_download_trends(
"data_source": data_source,
"timestamp": datetime.now().isoformat(),
}
# Add warning/note about data quality if present
if warning_note:
result["data_quality_note"] = warning_note
# Add reliability indicator
if data_source == "fallback_estimates":
result["reliability"] = "estimated"
result["warning"] = "Data is estimated due to API unavailability. Actual download trends may differ significantly."
result["warning"] = (
"Data is estimated due to API unavailability. Actual download trends may differ significantly."
)
elif "stale" in warning_note.lower() if warning_note else False:
result["reliability"] = "cached"
result["warning"] = "Data may be outdated due to current API issues."
@ -201,56 +204,54 @@ async def get_top_packages_by_downloads(
"""
# Get curated popular packages as base data
curated_packages = get_popular_packages(limit=max(limit * 2, 100))
# Try to enhance with real PyPI stats
enhanced_packages = await _enhance_with_real_stats(
curated_packages, period, limit
)
enhanced_packages = await _enhance_with_real_stats(curated_packages, period, limit)
# Try to enhance with GitHub metrics
final_packages = await _enhance_with_github_stats(
enhanced_packages, limit
)
final_packages = await _enhance_with_github_stats(enhanced_packages, limit)
# Ensure we have the requested number of packages
if len(final_packages) < limit:
# Add more from curated list if needed
additional_needed = limit - len(final_packages)
existing_names = {pkg["package"] for pkg in final_packages}
for pkg_info in curated_packages:
if pkg_info.name not in existing_names and additional_needed > 0:
final_packages.append({
"package": pkg_info.name,
"downloads": estimate_downloads_for_period(
pkg_info.estimated_monthly_downloads, period
),
"period": period,
"data_source": "curated",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
})
final_packages.append(
{
"package": pkg_info.name,
"downloads": estimate_downloads_for_period(
pkg_info.estimated_monthly_downloads, period
),
"period": period,
"data_source": "curated",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
}
)
additional_needed -= 1
# Sort by download count and assign ranks
final_packages.sort(key=lambda x: x.get("downloads", 0), reverse=True)
final_packages = final_packages[:limit]
for i, package in enumerate(final_packages):
package["rank"] = i + 1
# Determine primary data source
real_stats_count = len([p for p in final_packages if not p.get("estimated", False)])
github_enhanced_count = len([p for p in final_packages if "github_stars" in p])
if real_stats_count > limit // 2:
primary_source = "pypistats.org with curated fallback"
elif github_enhanced_count > 0:
primary_source = "curated data enhanced with GitHub metrics"
else:
primary_source = "curated popular packages database"
return {
"top_packages": final_packages,
"period": period,
@ -386,50 +387,73 @@ def _analyze_download_trends(
async def _enhance_with_real_stats(
curated_packages: List, period: str, limit: int
) -> List[Dict[str, Any]]:
curated_packages: list, period: str, limit: int
) -> list[dict[str, Any]]:
"""Try to enhance curated packages with real PyPI download statistics.
Args:
curated_packages: List of PackageInfo objects from curated data
period: Time period for stats
limit: Maximum number of packages to process
Returns:
List of enhanced package dictionaries
"""
enhanced_packages = []
try:
async with PyPIStatsClient() as stats_client:
# Try to get real stats for top packages
for pkg_info in curated_packages[:limit * 2]: # Try more than needed
for pkg_info in curated_packages[: limit * 2]: # Try more than needed
try:
stats = await stats_client.get_recent_downloads(
pkg_info.name, period, use_cache=True
)
download_data = stats.get("data", {})
real_download_count = _extract_download_count(download_data, period)
if real_download_count > 0:
# Use real stats
enhanced_packages.append({
"package": pkg_info.name,
"downloads": real_download_count,
"period": period,
"data_source": "pypistats.org",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": False,
})
logger.debug(f"Got real stats for {pkg_info.name}: {real_download_count}")
enhanced_packages.append(
{
"package": pkg_info.name,
"downloads": real_download_count,
"period": period,
"data_source": "pypistats.org",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": False,
}
)
logger.debug(
f"Got real stats for {pkg_info.name}: {real_download_count}"
)
else:
# Fall back to estimated downloads
estimated_downloads = estimate_downloads_for_period(
pkg_info.estimated_monthly_downloads, period
)
enhanced_packages.append({
enhanced_packages.append(
{
"package": pkg_info.name,
"downloads": estimated_downloads,
"period": period,
"data_source": "estimated",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
}
)
except Exception as e:
logger.debug(f"Failed to get real stats for {pkg_info.name}: {e}")
# Fall back to estimated downloads
estimated_downloads = estimate_downloads_for_period(
pkg_info.estimated_monthly_downloads, period
)
enhanced_packages.append(
{
"package": pkg_info.name,
"downloads": estimated_downloads,
"period": period,
@ -437,28 +461,13 @@ async def _enhance_with_real_stats(
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
})
except Exception as e:
logger.debug(f"Failed to get real stats for {pkg_info.name}: {e}")
# Fall back to estimated downloads
estimated_downloads = estimate_downloads_for_period(
pkg_info.estimated_monthly_downloads, period
}
)
enhanced_packages.append({
"package": pkg_info.name,
"downloads": estimated_downloads,
"period": period,
"data_source": "estimated",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
})
# Stop if we have enough packages
if len(enhanced_packages) >= limit:
break
except Exception as e:
logger.warning(f"PyPI stats client failed entirely: {e}")
# Fall back to all estimated data
@ -466,52 +475,56 @@ async def _enhance_with_real_stats(
estimated_downloads = estimate_downloads_for_period(
pkg_info.estimated_monthly_downloads, period
)
enhanced_packages.append({
"package": pkg_info.name,
"downloads": estimated_downloads,
"period": period,
"data_source": "estimated",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
})
enhanced_packages.append(
{
"package": pkg_info.name,
"downloads": estimated_downloads,
"period": period,
"data_source": "estimated",
"category": pkg_info.category,
"description": pkg_info.description,
"estimated": True,
}
)
return enhanced_packages
async def _enhance_with_github_stats(
packages: List[Dict[str, Any]], limit: int
) -> List[Dict[str, Any]]:
packages: list[dict[str, Any]], limit: int
) -> list[dict[str, Any]]:
"""Try to enhance packages with GitHub repository statistics.
Args:
packages: List of package dictionaries to enhance
limit: Maximum number of packages to process
Returns:
List of enhanced package dictionaries
"""
github_token = os.getenv("GITHUB_TOKEN") # Optional GitHub token
try:
async with GitHubAPIClient(github_token=github_token) as github_client:
# Get GitHub repo paths for packages that have them
repo_paths = []
package_to_repo = {}
for pkg in packages[:limit]:
repo_path = GITHUB_REPO_PATTERNS.get(pkg["package"])
if repo_path:
repo_paths.append(repo_path)
package_to_repo[pkg["package"]] = repo_path
if repo_paths:
# Fetch GitHub stats for all repositories concurrently
logger.debug(f"Fetching GitHub stats for {len(repo_paths)} repositories")
logger.debug(
f"Fetching GitHub stats for {len(repo_paths)} repositories"
)
repo_stats = await github_client.get_multiple_repo_stats(
repo_paths, use_cache=True, max_concurrent=3
)
# Enhance packages with GitHub data
for pkg in packages:
repo_path = package_to_repo.get(pkg["package"])
@ -523,38 +536,42 @@ async def _enhance_with_github_stats(
pkg["github_updated_at"] = stats["updated_at"]
pkg["github_language"] = stats["language"]
pkg["github_topics"] = stats.get("topics", [])
# Adjust download estimates based on GitHub popularity
if pkg.get("estimated", False):
popularity_boost = _calculate_popularity_boost(stats)
pkg["downloads"] = int(pkg["downloads"] * popularity_boost)
pkg["downloads"] = int(
pkg["downloads"] * popularity_boost
)
pkg["github_enhanced"] = True
logger.info(f"Enhanced {len([p for p in packages if 'github_stars' in p])} packages with GitHub data")
logger.info(
f"Enhanced {len([p for p in packages if 'github_stars' in p])} packages with GitHub data"
)
except Exception as e:
logger.debug(f"GitHub enhancement failed: {e}")
# Continue without GitHub enhancement
pass
return packages
def _calculate_popularity_boost(github_stats: Dict[str, Any]) -> float:
def _calculate_popularity_boost(github_stats: dict[str, Any]) -> float:
"""Calculate a popularity boost multiplier based on GitHub metrics.
Args:
github_stats: GitHub repository statistics
Returns:
Multiplier between 0.5 and 2.0 based on popularity
"""
stars = github_stats.get("stars", 0)
forks = github_stats.get("forks", 0)
# Base multiplier
multiplier = 1.0
# Adjust based on stars (logarithmic scale)
if stars > 50000:
multiplier *= 1.5
@ -568,7 +585,7 @@ def _calculate_popularity_boost(github_stats: Dict[str, Any]) -> float:
multiplier *= 0.9
elif stars < 500:
multiplier *= 0.8
# Adjust based on forks (indicates active usage)
if forks > 10000:
multiplier *= 1.2
@ -576,7 +593,7 @@ def _calculate_popularity_boost(github_stats: Dict[str, Any]) -> float:
multiplier *= 1.1
elif forks < 100:
multiplier *= 0.9
# Ensure multiplier stays within reasonable bounds
return max(0.5, min(2.0, multiplier))

View File

@ -68,8 +68,12 @@ def format_package_info(package_data: dict[str, Any]) -> dict[str, Any]:
formatted["total_versions"] = len(releases)
# Sort versions semantically and get the most recent 10
if releases:
sorted_versions = sort_versions_semantically(list(releases.keys()), reverse=True)
formatted["available_versions"] = sorted_versions[:10] # Most recent 10 versions
sorted_versions = sort_versions_semantically(
list(releases.keys()), reverse=True
)
formatted["available_versions"] = sorted_versions[
:10
] # Most recent 10 versions
else:
formatted["available_versions"] = []
@ -139,7 +143,7 @@ def format_dependency_info(package_data: dict[str, Any]) -> dict[str, Any]:
Formatted dependency information
"""
from ..core.dependency_parser import DependencyParser
info = package_data.get("info", {})
requires_dist = info.get("requires_dist", []) or []
provides_extra = info.get("provides_extra", []) or []
@ -152,7 +156,7 @@ def format_dependency_info(package_data: dict[str, Any]) -> dict[str, Any]:
# Convert Requirements back to strings for JSON serialization
runtime_deps = [str(req) for req in categories["runtime"]]
dev_deps = [str(req) for req in categories["development"]]
# Convert optional dependencies (extras) to string format
optional_deps = {}
for extra_name, reqs in categories["extras"].items():
@ -161,14 +165,31 @@ def format_dependency_info(package_data: dict[str, Any]) -> dict[str, Any]:
# Separate development and non-development optional dependencies
dev_optional_deps = {}
non_dev_optional_deps = {}
# Define development-related extra names (same as in DependencyParser)
dev_extra_names = {
'dev', 'development', 'test', 'testing', 'tests', 'lint', 'linting',
'doc', 'docs', 'documentation', 'build', 'check', 'cover', 'coverage',
'type', 'typing', 'mypy', 'style', 'format', 'quality'
"dev",
"development",
"test",
"testing",
"tests",
"lint",
"linting",
"doc",
"docs",
"documentation",
"build",
"check",
"cover",
"coverage",
"type",
"typing",
"mypy",
"style",
"format",
"quality",
}
for extra_name, deps in optional_deps.items():
if extra_name.lower() in dev_extra_names:
dev_optional_deps[extra_name] = deps
@ -260,11 +281,11 @@ async def query_package_versions(package_name: str) -> dict[str, Any]:
async def query_package_dependencies(
package_name: str,
version: str | None = None,
package_name: str,
version: str | None = None,
include_transitive: bool = False,
max_depth: int = 5,
python_version: str | None = None
python_version: str | None = None,
) -> dict[str, Any]:
"""Query package dependency information from PyPI.
@ -293,29 +314,35 @@ async def query_package_dependencies(
logger.info(
f"Querying dependencies for package: {package_name}"
+ (f" version {version}" if version else " (latest)")
+ (f" with transitive dependencies (max depth: {max_depth})" if include_transitive else " (direct only)")
+ (
f" with transitive dependencies (max depth: {max_depth})"
if include_transitive
else " (direct only)"
)
)
try:
if include_transitive:
# Use the comprehensive dependency resolver for transitive dependencies
from .dependency_resolver import resolve_package_dependencies
result = await resolve_package_dependencies(
package_name=package_name,
python_version=python_version,
include_extras=[],
include_dev=False,
max_depth=max_depth
max_depth=max_depth,
)
# Format the transitive dependency result to match expected structure
return format_transitive_dependency_info(result, package_name, version)
else:
# Use direct dependency logic with version support
async with PyPIClient() as client:
# Pass the version parameter to get_package_info
package_data = await client.get_package_info(package_name, version=version)
package_data = await client.get_package_info(
package_name, version=version
)
return format_dependency_info(package_data)
except PyPIError:
# Re-raise PyPI-specific errors
@ -342,9 +369,9 @@ def format_transitive_dependency_info(
normalized_name = package_name.lower().replace("_", "-")
dependency_tree = resolver_result.get("dependency_tree", {})
summary = resolver_result.get("summary", {})
main_package = dependency_tree.get(normalized_name, {})
# Build the response in the same format as direct dependencies but with tree structure
result = {
"package_name": package_name,
@ -353,42 +380,51 @@ def format_transitive_dependency_info(
"include_transitive": True,
"max_depth": summary.get("max_depth", 0),
"python_version": resolver_result.get("python_version"),
# Direct dependencies (same as before)
"runtime_dependencies": main_package.get("dependencies", {}).get("runtime", []),
"development_dependencies": main_package.get("dependencies", {}).get("development", []),
"development_dependencies": main_package.get("dependencies", {}).get(
"development", []
),
"optional_dependencies": main_package.get("dependencies", {}).get("extras", {}),
# Transitive dependency information
"transitive_dependencies": {
"dependency_tree": _build_dependency_tree_structure(dependency_tree, normalized_name),
"dependency_tree": _build_dependency_tree_structure(
dependency_tree, normalized_name
),
"all_packages": _extract_all_packages_info(dependency_tree),
"circular_dependencies": _detect_circular_dependencies(dependency_tree),
"depth_analysis": _analyze_dependency_depths(dependency_tree),
},
# Enhanced summary statistics
"dependency_summary": {
"direct_runtime_count": len(main_package.get("dependencies", {}).get("runtime", [])),
"direct_dev_count": len(main_package.get("dependencies", {}).get("development", [])),
"direct_optional_groups": len(main_package.get("dependencies", {}).get("extras", {})),
"total_transitive_packages": summary.get("total_packages", 0) - 1, # Exclude main package
"direct_runtime_count": len(
main_package.get("dependencies", {}).get("runtime", [])
),
"direct_dev_count": len(
main_package.get("dependencies", {}).get("development", [])
),
"direct_optional_groups": len(
main_package.get("dependencies", {}).get("extras", {})
),
"total_transitive_packages": summary.get("total_packages", 0)
- 1, # Exclude main package
"total_runtime_dependencies": summary.get("total_runtime_dependencies", 0),
"total_development_dependencies": summary.get("total_development_dependencies", 0),
"total_development_dependencies": summary.get(
"total_development_dependencies", 0
),
"total_extra_dependencies": summary.get("total_extra_dependencies", 0),
"max_dependency_depth": summary.get("max_depth", 0),
"complexity_score": _calculate_complexity_score(summary),
},
# Performance and health metrics
"analysis": {
"resolution_stats": summary,
"potential_conflicts": _analyze_potential_conflicts(dependency_tree),
"maintenance_concerns": _analyze_maintenance_concerns(dependency_tree),
"performance_impact": _assess_performance_impact(summary),
}
},
}
return result
@ -398,27 +434,27 @@ def _build_dependency_tree_structure(
"""Build a hierarchical dependency tree structure."""
if visited is None:
visited = set()
if root_package in visited:
return {"circular_reference": True, "package_name": root_package}
visited.add(root_package)
if root_package not in dependency_tree:
return {}
package_info = dependency_tree[root_package]
children = package_info.get("children", {})
tree_node = {
"package_name": package_info.get("name", root_package),
"version": package_info.get("version", "unknown"),
"depth": package_info.get("depth", 0),
"requires_python": package_info.get("requires_python", ""),
"dependencies": package_info.get("dependencies", {}),
"children": {}
"children": {},
}
# Recursively build children (with visited tracking to prevent infinite loops)
for child_name in children:
if child_name not in visited:
@ -427,17 +463,19 @@ def _build_dependency_tree_structure(
)
else:
tree_node["children"][child_name] = {
"circular_reference": True,
"package_name": child_name
"circular_reference": True,
"package_name": child_name,
}
return tree_node
def _extract_all_packages_info(dependency_tree: dict[str, Any]) -> dict[str, dict[str, Any]]:
def _extract_all_packages_info(
dependency_tree: dict[str, Any],
) -> dict[str, dict[str, Any]]:
"""Extract comprehensive information about all packages in the dependency tree."""
all_packages = {}
for package_name, package_info in dependency_tree.items():
all_packages[package_name] = {
"name": package_info.get("name", package_name),
@ -446,60 +484,73 @@ def _extract_all_packages_info(dependency_tree: dict[str, Any]) -> dict[str, dic
"requires_python": package_info.get("requires_python", ""),
"direct_dependencies": {
"runtime": package_info.get("dependencies", {}).get("runtime", []),
"development": package_info.get("dependencies", {}).get("development", []),
"development": package_info.get("dependencies", {}).get(
"development", []
),
"extras": package_info.get("dependencies", {}).get("extras", {}),
},
"dependency_count": {
"runtime": len(package_info.get("dependencies", {}).get("runtime", [])),
"development": len(package_info.get("dependencies", {}).get("development", [])),
"total_extras": sum(len(deps) for deps in package_info.get("dependencies", {}).get("extras", {}).values()),
}
"development": len(
package_info.get("dependencies", {}).get("development", [])
),
"total_extras": sum(
len(deps)
for deps in package_info.get("dependencies", {})
.get("extras", {})
.values()
),
},
}
return all_packages
def _detect_circular_dependencies(dependency_tree: dict[str, Any]) -> list[dict[str, Any]]:
def _detect_circular_dependencies(
dependency_tree: dict[str, Any],
) -> list[dict[str, Any]]:
"""Detect circular dependencies in the dependency tree."""
circular_deps = []
def dfs(package_name: str, path: list[str], visited: set[str]) -> None:
if package_name in path:
# Found a circular dependency
cycle_start = path.index(package_name)
cycle = path[cycle_start:] + [package_name]
circular_deps.append({
"cycle": cycle,
"length": len(cycle) - 1,
"packages_involved": list(set(cycle))
})
circular_deps.append(
{
"cycle": cycle,
"length": len(cycle) - 1,
"packages_involved": list(set(cycle)),
}
)
return
if package_name in visited or package_name not in dependency_tree:
return
visited.add(package_name)
path.append(package_name)
# Check children
children = dependency_tree[package_name].get("children", {})
for child_name in children:
dfs(child_name, path.copy(), visited)
# Start DFS from each package
for package_name in dependency_tree:
dfs(package_name, [], set())
# Remove duplicates
unique_cycles = []
seen_cycles = set()
for cycle_info in circular_deps:
cycle_set = frozenset(cycle_info["packages_involved"])
if cycle_set not in seen_cycles:
seen_cycles.add(cycle_set)
unique_cycles.append(cycle_info)
return unique_cycles
@ -507,29 +558,36 @@ def _analyze_dependency_depths(dependency_tree: dict[str, Any]) -> dict[str, Any
"""Analyze the depth distribution of dependencies."""
depth_counts = {}
depth_packages = {}
for package_name, package_info in dependency_tree.items():
depth = package_info.get("depth", 0)
if depth not in depth_counts:
depth_counts[depth] = 0
depth_packages[depth] = []
depth_counts[depth] += 1
depth_packages[depth].append(package_name)
max_depth = max(depth_counts.keys()) if depth_counts else 0
return {
"max_depth": max_depth,
"depth_distribution": depth_counts,
"packages_by_depth": depth_packages,
"average_depth": sum(d * c for d, c in depth_counts.items()) / sum(depth_counts.values()) if depth_counts else 0,
"average_depth": sum(d * c for d, c in depth_counts.items())
/ sum(depth_counts.values())
if depth_counts
else 0,
"depth_analysis": {
"shallow_deps": depth_counts.get(1, 0), # Direct dependencies
"deep_deps": sum(count for depth, count in depth_counts.items() if depth > 2),
"leaf_packages": [pkg for pkg, info in dependency_tree.items() if not info.get("children")]
}
"deep_deps": sum(
count for depth, count in depth_counts.items() if depth > 2
),
"leaf_packages": [
pkg for pkg, info in dependency_tree.items() if not info.get("children")
],
},
}
@ -538,14 +596,14 @@ def _calculate_complexity_score(summary: dict[str, Any]) -> dict[str, Any]:
total_packages = summary.get("total_packages", 0)
max_depth = summary.get("max_depth", 0)
total_deps = summary.get("total_runtime_dependencies", 0)
# Simple complexity scoring (can be enhanced)
base_score = total_packages * 0.3
depth_penalty = max_depth * 1.5
dependency_penalty = total_deps * 0.1
complexity_score = base_score + depth_penalty + dependency_penalty
# Classify complexity
if complexity_score < 10:
complexity_level = "low"
@ -558,8 +616,10 @@ def _calculate_complexity_score(summary: dict[str, Any]) -> dict[str, Any]:
recommendation = "High complexity, consider dependency management strategies"
else:
complexity_level = "very_high"
recommendation = "Very high complexity, significant maintenance overhead expected"
recommendation = (
"Very high complexity, significant maintenance overhead expected"
)
return {
"score": round(complexity_score, 2),
"level": complexity_level,
@ -568,42 +628,50 @@ def _calculate_complexity_score(summary: dict[str, Any]) -> dict[str, Any]:
"total_packages": total_packages,
"max_depth": max_depth,
"total_dependencies": total_deps,
}
},
}
def _analyze_potential_conflicts(dependency_tree: dict[str, Any]) -> list[dict[str, Any]]:
def _analyze_potential_conflicts(
dependency_tree: dict[str, Any],
) -> list[dict[str, Any]]:
"""Analyze potential version conflicts in dependencies."""
# This is a simplified analysis - in a real implementation,
# This is a simplified analysis - in a real implementation,
# you'd parse version constraints and check for conflicts
package_versions = {}
potential_conflicts = []
for package_name, package_info in dependency_tree.items():
runtime_deps = package_info.get("dependencies", {}).get("runtime", [])
for dep_str in runtime_deps:
# Basic parsing of "package>=version" format
if ">=" in dep_str or "==" in dep_str or "<" in dep_str or ">" in dep_str:
parts = dep_str.replace(">=", "@").replace("==", "@").replace("<", "@").replace(">", "@")
parts = (
dep_str.replace(">=", "@")
.replace("==", "@")
.replace("<", "@")
.replace(">", "@")
)
dep_name = parts.split("@")[0].strip()
if dep_name not in package_versions:
package_versions[dep_name] = []
package_versions[dep_name].append({
"constraint": dep_str,
"required_by": package_name
})
package_versions[dep_name].append(
{"constraint": dep_str, "required_by": package_name}
)
# Look for packages with multiple version constraints
for dep_name, constraints in package_versions.items():
if len(constraints) > 1:
potential_conflicts.append({
"package": dep_name,
"conflicting_constraints": constraints,
"severity": "potential" if len(constraints) == 2 else "high"
})
potential_conflicts.append(
{
"package": dep_name,
"conflicting_constraints": constraints,
"severity": "potential" if len(constraints) == 2 else "high",
}
)
return potential_conflicts
@ -611,25 +679,25 @@ def _analyze_maintenance_concerns(dependency_tree: dict[str, Any]) -> dict[str,
"""Analyze maintenance concerns in the dependency tree."""
total_packages = len(dependency_tree)
packages_without_version = sum(
1 for info in dependency_tree.values()
1
for info in dependency_tree.values()
if info.get("version") in ["unknown", "", None]
)
packages_without_python_req = sum(
1 for info in dependency_tree.values()
if not info.get("requires_python")
1 for info in dependency_tree.values() if not info.get("requires_python")
)
# Calculate dependency concentration (packages with many dependencies)
high_dep_packages = [
{
"name": name,
"dependency_count": len(info.get("dependencies", {}).get("runtime", []))
"dependency_count": len(info.get("dependencies", {}).get("runtime", [])),
}
for name, info in dependency_tree.items()
if len(info.get("dependencies", {}).get("runtime", [])) > 5
]
return {
"total_packages": total_packages,
"packages_without_version_info": packages_without_version,
@ -637,11 +705,18 @@ def _analyze_maintenance_concerns(dependency_tree: dict[str, Any]) -> dict[str,
"high_dependency_packages": high_dep_packages,
"maintenance_risk_score": {
"score": round(
(packages_without_version / total_packages * 100) +
(len(high_dep_packages) / total_packages * 50), 2
) if total_packages > 0 else 0,
"level": "low" if total_packages < 10 else "moderate" if total_packages < 30 else "high"
}
(packages_without_version / total_packages * 100)
+ (len(high_dep_packages) / total_packages * 50),
2,
)
if total_packages > 0
else 0,
"level": "low"
if total_packages < 10
else "moderate"
if total_packages < 30
else "high",
},
}
@ -649,34 +724,40 @@ def _assess_performance_impact(summary: dict[str, Any]) -> dict[str, Any]:
"""Assess the performance impact of the dependency tree."""
total_packages = summary.get("total_packages", 0)
max_depth = summary.get("max_depth", 0)
# Estimate installation time (rough approximation)
estimated_install_time = total_packages * 2 + max_depth * 5 # seconds
# Estimate memory footprint (very rough)
estimated_memory_mb = total_packages * 10 + max_depth * 5
# Performance recommendations
recommendations = []
if total_packages > 50:
recommendations.append("Consider using virtual environments to isolate dependencies")
recommendations.append(
"Consider using virtual environments to isolate dependencies"
)
if max_depth > 5:
recommendations.append("Deep dependency chains may slow resolution and installation")
recommendations.append(
"Deep dependency chains may slow resolution and installation"
)
if total_packages > 100:
recommendations.append("Consider dependency analysis tools for large projects")
return {
"estimated_install_time_seconds": estimated_install_time,
"estimated_memory_footprint_mb": estimated_memory_mb,
"performance_level": (
"good" if total_packages < 20
else "moderate" if total_packages < 50
"good"
if total_packages < 20
else "moderate"
if total_packages < 50
else "concerning"
),
"recommendations": recommendations,
"metrics": {
"package_count_impact": "low" if total_packages < 20 else "high",
"depth_impact": "low" if max_depth < 4 else "high",
"resolution_complexity": "simple" if total_packages < 10 else "complex"
}
"resolution_complexity": "simple" if total_packages < 10 else "complex",
},
}

View File

@ -34,7 +34,7 @@ httpx = "^0.28.0"
packaging = "^24.0"
pydantic = "^2.0.0"
pydantic-settings = "^2.0.0"
click = "^8.1.0"
click = "8.1.7"
[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"

View File

@ -2,8 +2,9 @@
"""Quick test to verify fallback mechanism works."""
import asyncio
import sys
import os
import sys
sys.path.insert(0, os.path.abspath("."))
from pypi_query_mcp.tools.download_stats import get_package_download_stats
@ -12,23 +13,23 @@ from pypi_query_mcp.tools.download_stats import get_package_download_stats
async def quick_test():
"""Quick test with a single package."""
print("Testing fallback mechanism with requests package...")
try:
stats = await get_package_download_stats("requests", period="month")
print(f"✅ Success!")
print("✅ Success!")
print(f"Package: {stats.get('package')}")
print(f"Data Source: {stats.get('data_source')}")
print(f"Reliability: {stats.get('reliability')}")
if stats.get('warning'):
if stats.get("warning"):
print(f"⚠️ Warning: {stats['warning']}")
downloads = stats.get("downloads", {})
print(f"Downloads - Month: {downloads.get('last_month', 0):,}")
return True
except Exception as e:
print(f"❌ Error: {e}")
return False
@ -36,4 +37,4 @@ async def quick_test():
if __name__ == "__main__":
success = asyncio.run(quick_test())
sys.exit(0 if success else 1)
sys.exit(0 if success else 1)

View File

@ -1,16 +1,17 @@
#!/usr/bin/env python3
"""Simple test for the transitive dependency formatting functions."""
import sys
import os
import sys
# Add the current directory to Python path
sys.path.insert(0, os.path.dirname(__file__))
def test_formatting_functions():
"""Test the formatting functions directly."""
print("Testing transitive dependency formatting functions...")
# Sample data that mimics what the dependency resolver would return
sample_resolver_result = {
"package_name": "requests",
@ -21,9 +22,13 @@ def test_formatting_functions():
"version": "2.31.0",
"requires_python": ">=3.7",
"dependencies": {
"runtime": ["urllib3>=1.21.1", "certifi>=2017.4.17", "charset-normalizer>=2.0"],
"runtime": [
"urllib3>=1.21.1",
"certifi>=2017.4.17",
"charset-normalizer>=2.0",
],
"development": [],
"extras": {}
"extras": {},
},
"depth": 0,
"children": {
@ -34,10 +39,10 @@ def test_formatting_functions():
"dependencies": {
"runtime": [],
"development": [],
"extras": {}
"extras": {},
},
"depth": 1,
"children": {}
"children": {},
},
"certifi": {
"name": "certifi",
@ -46,37 +51,29 @@ def test_formatting_functions():
"dependencies": {
"runtime": [],
"development": [],
"extras": {}
"extras": {},
},
"depth": 1,
"children": {}
}
}
"children": {},
},
},
},
"urllib3": {
"name": "urllib3",
"version": "2.0.4",
"requires_python": ">=3.7",
"dependencies": {
"runtime": [],
"development": [],
"extras": {}
},
"dependencies": {"runtime": [], "development": [], "extras": {}},
"depth": 1,
"children": {}
"children": {},
},
"certifi": {
"name": "certifi",
"version": "2023.7.22",
"requires_python": ">=3.6",
"dependencies": {
"runtime": [],
"development": [],
"extras": {}
},
"dependencies": {"runtime": [], "development": [], "extras": {}},
"depth": 1,
"children": {}
}
"children": {},
},
},
"summary": {
"total_packages": 3,
@ -84,55 +81,62 @@ def test_formatting_functions():
"total_development_dependencies": 0,
"total_extra_dependencies": 0,
"max_depth": 1,
"package_list": ["requests", "urllib3", "certifi"]
}
"package_list": ["requests", "urllib3", "certifi"],
},
}
# Import the formatting function
try:
from pypi_query_mcp.tools.package_query import (
format_transitive_dependency_info,
_build_dependency_tree_structure,
_extract_all_packages_info,
_detect_circular_dependencies,
_analyze_dependency_depths,
_calculate_complexity_score
_build_dependency_tree_structure,
_calculate_complexity_score,
_detect_circular_dependencies,
_extract_all_packages_info,
format_transitive_dependency_info,
)
# Test format_transitive_dependency_info
print("✓ Successfully imported formatting functions")
result = format_transitive_dependency_info(sample_resolver_result, "requests")
print(f"✓ Formatted result for package: {result.get('package_name')}")
print(f" Include transitive: {result.get('include_transitive')}")
print(f" Version: {result.get('version')}")
print(f" Max depth: {result.get('max_depth')}")
# Test transitive dependencies section
transitive = result.get('transitive_dependencies', {})
transitive = result.get("transitive_dependencies", {})
print(f" All packages count: {len(transitive.get('all_packages', {}))}")
print(f" Circular dependencies: {len(transitive.get('circular_dependencies', []))}")
print(
f" Circular dependencies: {len(transitive.get('circular_dependencies', []))}"
)
# Test dependency summary
summary = result.get('dependency_summary', {})
summary = result.get("dependency_summary", {})
print(f" Direct runtime count: {summary.get('direct_runtime_count')}")
print(f" Total transitive packages: {summary.get('total_transitive_packages')}")
print(
f" Total transitive packages: {summary.get('total_transitive_packages')}"
)
print(f" Complexity level: {summary.get('complexity_score', {}).get('level')}")
# Test analysis section
analysis = result.get('analysis', {})
print(f" Performance level: {analysis.get('performance_impact', {}).get('performance_level')}")
analysis = result.get("analysis", {})
print(
f" Performance level: {analysis.get('performance_impact', {}).get('performance_level')}"
)
print("✓ All formatting functions working correctly")
return True
except ImportError as e:
print(f"✗ Import error: {e}")
return False
except Exception as e:
print(f"✗ Error testing formatting functions: {e}")
import traceback
traceback.print_exc()
return False
@ -140,50 +144,51 @@ def test_formatting_functions():
def test_helper_functions():
"""Test individual helper functions."""
print("\nTesting helper functions...")
sample_tree = {
"pkg-a": {
"name": "pkg-a",
"version": "1.0.0",
"depth": 0,
"children": {"pkg-b": {}, "pkg-c": {}}
},
"pkg-b": {
"name": "pkg-b",
"version": "2.0.0",
"depth": 1,
"children": {}
"children": {"pkg-b": {}, "pkg-c": {}},
},
"pkg-b": {"name": "pkg-b", "version": "2.0.0", "depth": 1, "children": {}},
"pkg-c": {
"name": "pkg-c",
"version": "3.0.0",
"version": "3.0.0",
"depth": 1,
"children": {"pkg-b": {}} # Creates potential circular reference
}
"children": {"pkg-b": {}}, # Creates potential circular reference
},
}
try:
from pypi_query_mcp.tools.package_query import (
_extract_all_packages_info,
_analyze_dependency_depths,
_calculate_complexity_score
_calculate_complexity_score,
_extract_all_packages_info,
)
# Test _extract_all_packages_info
all_packages = _extract_all_packages_info(sample_tree)
print(f"✓ Extracted {len(all_packages)} packages")
# Test _analyze_dependency_depths
depth_analysis = _analyze_dependency_depths(sample_tree)
print(f"✓ Depth analysis - max depth: {depth_analysis.get('max_depth')}")
# Test _calculate_complexity_score
sample_summary = {"total_packages": 3, "max_depth": 1, "total_runtime_dependencies": 2}
sample_summary = {
"total_packages": 3,
"max_depth": 1,
"total_runtime_dependencies": 2,
}
complexity = _calculate_complexity_score(sample_summary)
print(f"✓ Complexity score: {complexity.get('score')} ({complexity.get('level')})")
print(
f"✓ Complexity score: {complexity.get('score')} ({complexity.get('level')})"
)
return True
except Exception as e:
print(f"✗ Error testing helper functions: {e}")
return False
@ -193,14 +198,14 @@ def main():
"""Run tests."""
print("Simple Test for Transitive Dependencies")
print("=" * 50)
results = []
results.append(test_formatting_functions())
results.append(test_helper_functions())
print("\n" + "=" * 50)
print(f"Test Results: {sum(results)}/{len(results)} passed")
if all(results):
print("✓ All formatting tests passed!")
return 0
@ -210,4 +215,4 @@ def main():
if __name__ == "__main__":
sys.exit(main())
sys.exit(main())

View File

@ -3,23 +3,25 @@
import asyncio
import logging
import sys
import os
import sys
# Add the project root to the Python path
sys.path.insert(0, os.path.dirname(__file__))
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def test_pypi_client():
"""Test the PyPIClient with version-specific queries."""
# Import only the core modules we need
from pypi_query_mcp.core.pypi_client import PyPIClient
from pypi_query_mcp.core.exceptions import PackageNotFoundError
from pypi_query_mcp.core.pypi_client import PyPIClient
async with PyPIClient() as client:
# Test 1: Django 4.2.0 (specific version)
logger.info("Testing Django 4.2.0...")
@ -27,19 +29,21 @@ async def test_pypi_client():
data = await client.get_package_info("django", version="4.2.0")
actual_version = data.get("info", {}).get("version", "")
if actual_version in ["4.2", "4.2.0"]: # PyPI may normalize version numbers
logger.info(f"✅ Django 4.2.0 test passed (got version: {actual_version})")
logger.info(
f"✅ Django 4.2.0 test passed (got version: {actual_version})"
)
else:
logger.error(f"❌ Expected version 4.2.0, got {actual_version}")
return False
# Check dependencies
deps = data.get("info", {}).get("requires_dist", [])
logger.info(f" Dependencies found: {len(deps) if deps else 0}")
except Exception as e:
logger.error(f"❌ Django 4.2.0 test failed: {e}")
return False
# Test 2: Latest Django (no version)
logger.info("Testing Django latest...")
try:
@ -49,7 +53,7 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ Django latest test failed: {e}")
return False
# Test 3: Non-existent version (should fail)
logger.info("Testing Django 999.999.999 (should fail)...")
try:
@ -61,7 +65,7 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ Unexpected error type: {e}")
return False
# Test 4: FastAPI 0.100.0
logger.info("Testing FastAPI 0.100.0...")
try:
@ -75,7 +79,7 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ FastAPI 0.100.0 test failed: {e}")
return False
# Test 5: NumPy 1.20.0
logger.info("Testing NumPy 1.20.0...")
try:
@ -89,14 +93,17 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ NumPy 1.20.0 test failed: {e}")
return False
return True
async def test_dependency_formatting():
"""Test the dependency formatting functions."""
from pypi_query_mcp.tools.package_query import format_dependency_info, validate_version_format
from pypi_query_mcp.tools.package_query import (
format_dependency_info,
validate_version_format,
)
# Test version validation
logger.info("Testing version validation...")
test_versions = [
@ -110,15 +117,17 @@ async def test_dependency_formatting():
("", False),
(None, True),
]
for version, expected in test_versions:
result = validate_version_format(version)
if result == expected:
logger.info(f"✅ Version validation for '{version}': {result}")
else:
logger.error(f"❌ Version validation for '{version}': expected {expected}, got {result}")
logger.error(
f"❌ Version validation for '{version}': expected {expected}, got {result}"
)
return False
# Test dependency formatting with mock data
logger.info("Testing dependency formatting...")
mock_data = {
@ -130,84 +139,97 @@ async def test_dependency_formatting():
"requests>=2.25.0",
"click>=8.0.0",
"pytest>=6.0.0; extra=='test'",
"black>=21.0.0; extra=='dev'"
]
"black>=21.0.0; extra=='dev'",
],
}
}
result = format_dependency_info(mock_data)
expected_fields = ["package_name", "version", "runtime_dependencies", "dependency_summary"]
expected_fields = [
"package_name",
"version",
"runtime_dependencies",
"dependency_summary",
]
for field in expected_fields:
if field not in result:
logger.error(f"❌ Missing field '{field}' in dependency formatting result")
return False
if len(result["runtime_dependencies"]) >= 2: # Should have requests and click
logger.info("✅ Dependency formatting test passed")
else:
logger.error(f"❌ Expected at least 2 runtime dependencies, got {len(result['runtime_dependencies'])}")
logger.error(
f"❌ Expected at least 2 runtime dependencies, got {len(result['runtime_dependencies'])}"
)
return False
return True
async def test_comparison():
"""Test that version-specific queries return different results than latest."""
from pypi_query_mcp.core.pypi_client import PyPIClient
logger.info("Testing that version-specific queries work differently than latest...")
async with PyPIClient() as client:
# Get Django latest
latest_data = await client.get_package_info("django", version=None)
latest_version = latest_data.get("info", {}).get("version", "")
# Get Django 4.2.0 specifically
specific_data = await client.get_package_info("django", version="4.2.0")
specific_version = specific_data.get("info", {}).get("version", "")
logger.info(f"Latest Django version: {latest_version}")
logger.info(f"Specific Django version: {specific_version}")
# They should be different (unless 4.2.0 happens to be latest, which is unlikely)
if specific_version in ["4.2", "4.2.0"] and latest_version != specific_version:
logger.info("✅ Version-specific query returns different version than latest")
logger.info(
"✅ Version-specific query returns different version than latest"
)
return True
elif specific_version in ["4.2", "4.2.0"]:
logger.info("⚠️ Specific version matches latest (this is fine, but less conclusive)")
logger.info(
"⚠️ Specific version matches latest (this is fine, but less conclusive)"
)
return True
else:
logger.error(f"❌ Specific version query failed: expected 4.2.0, got {specific_version}")
logger.error(
f"❌ Specific version query failed: expected 4.2.0, got {specific_version}"
)
return False
async def main():
"""Run all tests."""
logger.info("Starting PyPI client and dependency query tests...")
success = True
# Test PyPI client
if await test_pypi_client():
logger.info("✅ PyPI client tests passed")
else:
logger.error("❌ PyPI client tests failed")
success = False
# Test dependency formatting
if await test_dependency_formatting():
logger.info("✅ Dependency formatting tests passed")
else:
logger.error("❌ Dependency formatting tests failed")
success = False
# Test comparison
if await test_comparison():
logger.info("✅ Version comparison test passed")
else:
logger.error("❌ Version comparison test failed")
success = False
if success:
logger.info("🎉 All tests passed!")
return 0
@ -218,4 +240,4 @@ async def main():
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)
sys.exit(exit_code)

View File

@ -3,28 +3,32 @@
import asyncio
import logging
import sys
import os
import re
import httpx
import sys
from urllib.parse import quote
import httpx
# Add the project root to the Python path
sys.path.insert(0, os.path.dirname(__file__))
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
class SimplePackageNotFoundError(Exception):
"""Simple exception for package not found."""
pass
class SimplePyPIClient:
"""Simplified PyPI client for testing."""
def __init__(self):
self.base_url = "https://pypi.org/pypi"
self.client = httpx.AsyncClient(
@ -35,28 +39,30 @@ class SimplePyPIClient:
},
follow_redirects=True,
)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.client.aclose()
async def get_package_info(self, package_name: str, version: str = None):
"""Get package info with optional version."""
if version:
url = f"{self.base_url}/{quote(package_name)}/{quote(version)}/json"
else:
url = f"{self.base_url}/{quote(package_name)}/json"
response = await self.client.get(url)
if response.status_code == 404:
if version:
raise SimplePackageNotFoundError(f"Version {version} not found for package {package_name}")
raise SimplePackageNotFoundError(
f"Version {version} not found for package {package_name}"
)
else:
raise SimplePackageNotFoundError(f"Package {package_name} not found")
response.raise_for_status()
return response.json()
@ -65,7 +71,7 @@ def validate_version_format(version: str | None) -> bool:
"""Validate version format."""
if version is None:
return True
version_pattern = r"^[0-9]+(?:\.[0-9]+)*(?:[\.\-]?(?:a|b|rc|alpha|beta|dev|pre|post|final)[0-9]*)*$"
return bool(re.match(version_pattern, version.strip(), re.IGNORECASE))
@ -73,49 +79,53 @@ def validate_version_format(version: str | None) -> bool:
async def test_version_parameter_fix():
"""Test the version parameter functionality."""
logger.info("Testing version parameter fix...")
async with SimplePyPIClient() as client:
# Test 1: Django 4.2.0 (specific version)
logger.info("Testing Django 4.2.0...")
try:
data = await client.get_package_info("django", "4.2.0")
actual_version = data.get("info", {}).get("version", "")
if actual_version in ["4.2", "4.2.0"]:
logger.info(f"✅ Django 4.2.0 test passed (got version: {actual_version})")
logger.info(
f"✅ Django 4.2.0 test passed (got version: {actual_version})"
)
# Check dependencies
deps = data.get("info", {}).get("requires_dist", [])
logger.info(f" Dependencies found: {len(deps) if deps else 0}")
# Print a few dependencies to show they're different from latest
if deps:
logger.info(f" Sample dependencies: {deps[:3]}")
else:
logger.error(f"❌ Expected version 4.2.0, got {actual_version}")
return False
except Exception as e:
logger.error(f"❌ Django 4.2.0 test failed: {e}")
return False
# Test 2: Django latest (no version)
logger.info("Testing Django latest...")
try:
data = await client.get_package_info("django")
latest_version = data.get("info", {}).get("version", "")
logger.info(f"✅ Django latest test passed - version: {latest_version}")
# Verify that latest != 4.2.0 (to prove we're getting different results)
if latest_version not in ["4.2", "4.2.0"]:
logger.info("✅ Confirmed: latest version is different from 4.2.0")
else:
logger.info(" Latest version happens to be 4.2.0 (unlikely but possible)")
logger.info(
" Latest version happens to be 4.2.0 (unlikely but possible)"
)
except Exception as e:
logger.error(f"❌ Django latest test failed: {e}")
return False
# Test 3: FastAPI 0.100.0
logger.info("Testing FastAPI 0.100.0...")
try:
@ -123,7 +133,7 @@ async def test_version_parameter_fix():
actual_version = data.get("info", {}).get("version", "")
if actual_version == "0.100.0":
logger.info("✅ FastAPI 0.100.0 test passed")
# Check dependencies
deps = data.get("info", {}).get("requires_dist", [])
logger.info(f" Dependencies found: {len(deps) if deps else 0}")
@ -133,7 +143,7 @@ async def test_version_parameter_fix():
except Exception as e:
logger.error(f"❌ FastAPI 0.100.0 test failed: {e}")
return False
# Test 4: NumPy 1.20.0
logger.info("Testing NumPy 1.20.0...")
try:
@ -141,7 +151,7 @@ async def test_version_parameter_fix():
actual_version = data.get("info", {}).get("version", "")
if actual_version == "1.20.0":
logger.info("✅ NumPy 1.20.0 test passed")
# Check dependencies
deps = data.get("info", {}).get("requires_dist", [])
logger.info(f" Dependencies found: {len(deps) if deps else 0}")
@ -151,7 +161,7 @@ async def test_version_parameter_fix():
except Exception as e:
logger.error(f"❌ NumPy 1.20.0 test failed: {e}")
return False
# Test 5: Non-existent version (should fail)
logger.info("Testing Django 999.999.999 (should fail)...")
try:
@ -163,7 +173,7 @@ async def test_version_parameter_fix():
except Exception as e:
logger.error(f"❌ Unexpected error type: {e}")
return False
# Test 6: Pre-release version
logger.info("Testing Django 5.0a1 (pre-release)...")
try:
@ -171,18 +181,20 @@ async def test_version_parameter_fix():
actual_version = data.get("info", {}).get("version", "")
logger.info(f"✅ Django 5.0a1 test passed - got version: {actual_version}")
except SimplePackageNotFoundError:
logger.info(" Django 5.0a1 not found (this is expected for some pre-release versions)")
logger.info(
" Django 5.0a1 not found (this is expected for some pre-release versions)"
)
except Exception as e:
logger.error(f"❌ Django 5.0a1 test failed: {e}")
return False
return True
def test_version_validation():
"""Test version validation."""
logger.info("Testing version validation...")
test_cases = [
("1.0.0", True),
("2.1", True),
@ -195,41 +207,45 @@ def test_version_validation():
("", False),
(None, True),
]
all_passed = True
for version, expected in test_cases:
result = validate_version_format(version)
if result == expected:
logger.info(f"✅ Version validation for '{version}': {result}")
else:
logger.error(f"❌ Version validation for '{version}': expected {expected}, got {result}")
logger.error(
f"❌ Version validation for '{version}': expected {expected}, got {result}"
)
all_passed = False
return all_passed
async def compare_dependencies():
"""Compare dependencies between different versions."""
logger.info("Comparing dependencies between Django versions...")
async with SimplePyPIClient() as client:
# Get Django 4.2.0 dependencies
data_420 = await client.get_package_info("django", "4.2.0")
deps_420 = data_420.get("info", {}).get("requires_dist", [])
# Get Django latest dependencies
data_latest = await client.get_package_info("django")
deps_latest = data_latest.get("info", {}).get("requires_dist", [])
logger.info(f"Django 4.2.0 dependencies: {len(deps_420) if deps_420 else 0}")
logger.info(f"Django latest dependencies: {len(deps_latest) if deps_latest else 0}")
logger.info(
f"Django latest dependencies: {len(deps_latest) if deps_latest else 0}"
)
# Show some dependencies for comparison
if deps_420:
logger.info(f"Django 4.2.0 sample deps: {deps_420[:2]}")
if deps_latest:
logger.info(f"Django latest sample deps: {deps_latest[:2]}")
# They might be the same if 4.2.0 is latest, but structure should be correct
return True
@ -237,32 +253,34 @@ async def compare_dependencies():
async def main():
"""Run all tests."""
logger.info("🧪 Starting version parameter fix verification tests...")
success = True
# Test version validation
if test_version_validation():
logger.info("✅ Version validation tests passed")
else:
logger.error("❌ Version validation tests failed")
success = False
# Test version parameter functionality
if await test_version_parameter_fix():
logger.info("✅ Version parameter fix tests passed")
else:
logger.error("❌ Version parameter fix tests failed")
success = False
# Compare dependencies
if await compare_dependencies():
logger.info("✅ Dependency comparison test passed")
else:
logger.error("❌ Dependency comparison test failed")
success = False
if success:
logger.info("🎉 All tests passed! The version parameter fix is working correctly.")
logger.info(
"🎉 All tests passed! The version parameter fix is working correctly."
)
logger.info("")
logger.info("Summary of what was fixed:")
logger.info("- PyPIClient now supports version-specific queries")
@ -277,4 +295,4 @@ async def main():
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)
sys.exit(exit_code)

View File

@ -4,8 +4,8 @@ Test script for the enhanced PyPI download statistics with fallback mechanisms.
"""
import asyncio
import sys
import os
import sys
# Add the package to Python path
sys.path.insert(0, os.path.abspath("."))
@ -33,62 +33,66 @@ async def test_download_stats():
try:
# Test recent downloads
stats = await get_package_download_stats(package_name, period="month")
print(f"Package: {stats.get('package')}")
print(f"Data Source: {stats.get('data_source')}")
print(f"Reliability: {stats.get('reliability', 'unknown')}")
if stats.get('warning'):
if stats.get("warning"):
print(f"⚠️ Warning: {stats['warning']}")
downloads = stats.get("downloads", {})
print(f"Downloads - Day: {downloads.get('last_day', 0):,}, " +
f"Week: {downloads.get('last_week', 0):,}, " +
f"Month: {downloads.get('last_month', 0):,}")
if stats.get('data_quality_note'):
print(
f"Downloads - Day: {downloads.get('last_day', 0):,}, "
+ f"Week: {downloads.get('last_week', 0):,}, "
+ f"Month: {downloads.get('last_month', 0):,}"
)
if stats.get("data_quality_note"):
print(f"Note: {stats['data_quality_note']}")
except Exception as e:
print(f"❌ Error: {e}")
print(f"\n📈 Testing download trends for 'requests':")
print("\n📈 Testing download trends for 'requests':")
print("-" * 50)
try:
trends = await get_package_download_trends("requests", include_mirrors=False)
print(f"Package: {trends.get('package')}")
print(f"Data Source: {trends.get('data_source')}")
print(f"Reliability: {trends.get('reliability', 'unknown')}")
if trends.get('warning'):
if trends.get("warning"):
print(f"⚠️ Warning: {trends['warning']}")
trend_analysis = trends.get("trend_analysis", {})
print(f"Data Points: {trend_analysis.get('data_points', 0)}")
print(f"Total Downloads: {trend_analysis.get('total_downloads', 0):,}")
print(f"Trend Direction: {trend_analysis.get('trend_direction', 'unknown')}")
if trends.get('data_quality_note'):
if trends.get("data_quality_note"):
print(f"Note: {trends['data_quality_note']}")
except Exception as e:
print(f"❌ Error: {e}")
print(f"\n🏆 Testing top packages:")
print("\n🏆 Testing top packages:")
print("-" * 50)
try:
top_packages = await get_top_packages_by_downloads(period="month", limit=5)
print(f"Data Source: {top_packages.get('data_source')}")
print(f"Reliability: {top_packages.get('reliability', 'unknown')}")
print(f"Success Rate: {top_packages.get('data_collection_success_rate', 'unknown')}")
if top_packages.get('warning'):
print(
f"Success Rate: {top_packages.get('data_collection_success_rate', 'unknown')}"
)
if top_packages.get("warning"):
print(f"⚠️ Warning: {top_packages['warning']}")
packages_list = top_packages.get("top_packages", [])
print(f"\nTop {len(packages_list)} packages:")
for package in packages_list[:5]:
@ -107,4 +111,4 @@ async def test_download_stats():
if __name__ == "__main__":
asyncio.run(test_download_stats())
asyncio.run(test_download_stats())

View File

@ -2,44 +2,56 @@
"""Test script for the improved get_top_packages_by_downloads function."""
import asyncio
from pypi_query_mcp.tools.download_stats import get_top_packages_by_downloads
async def test_improved():
try:
result = await get_top_packages_by_downloads('month', 10)
print('✅ Success! Result keys:', list(result.keys()))
print(f'Number of packages returned: {len(result.get("top_packages", []))}')
print(f'Data source: {result.get("data_source")}')
print(f'Methodology: {result.get("methodology")}')
print('\nTop 5 packages:')
for i, pkg in enumerate(result.get('top_packages', [])[:5]):
downloads = pkg.get('downloads', 0)
stars = pkg.get('github_stars', 'N/A')
estimated = '(estimated)' if pkg.get('estimated', False) else '(real)'
github_enhanced = ' 🌟' if pkg.get('github_enhanced', False) else ''
print(f'{i+1}. {pkg.get("package", "N/A")} - {downloads:,} downloads {estimated}{github_enhanced}')
if stars != 'N/A':
print(f' GitHub: {stars:,} stars, {pkg.get("category", "N/A")} category')
result = await get_top_packages_by_downloads("month", 10)
print("✅ Success! Result keys:", list(result.keys()))
print(f"Number of packages returned: {len(result.get('top_packages', []))}")
print(f"Data source: {result.get('data_source')}")
print(f"Methodology: {result.get('methodology')}")
print("\nTop 5 packages:")
for i, pkg in enumerate(result.get("top_packages", [])[:5]):
downloads = pkg.get("downloads", 0)
stars = pkg.get("github_stars", "N/A")
estimated = "(estimated)" if pkg.get("estimated", False) else "(real)"
github_enhanced = " 🌟" if pkg.get("github_enhanced", False) else ""
print(
f"{i + 1}. {pkg.get('package', 'N/A')} - {downloads:,} downloads {estimated}{github_enhanced}"
)
if stars != "N/A":
print(
f" GitHub: {stars:,} stars, {pkg.get('category', 'N/A')} category"
)
# Test different periods
print('\n--- Testing different periods ---')
for period in ['day', 'week', 'month']:
print("\n--- Testing different periods ---")
for period in ["day", "week", "month"]:
result = await get_top_packages_by_downloads(period, 3)
top_3 = result.get('top_packages', [])
print(f'{period}: {len(top_3)} packages, avg downloads: {sum(p.get("downloads", 0) for p in top_3) // max(len(top_3), 1):,}')
print('\n--- Testing different limits ---')
top_3 = result.get("top_packages", [])
print(
f"{period}: {len(top_3)} packages, avg downloads: {sum(p.get('downloads', 0) for p in top_3) // max(len(top_3), 1):,}"
)
print("\n--- Testing different limits ---")
for limit in [5, 20, 50]:
result = await get_top_packages_by_downloads('month', limit)
packages = result.get('top_packages', [])
real_count = len([p for p in packages if not p.get('estimated', False)])
print(f'Limit {limit}: {len(packages)} packages returned, {real_count} with real stats')
result = await get_top_packages_by_downloads("month", limit)
packages = result.get("top_packages", [])
real_count = len([p for p in packages if not p.get("estimated", False)])
print(
f"Limit {limit}: {len(packages)} packages returned, {real_count} with real stats"
)
except Exception as e:
print(f'❌ Error: {e}')
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
if __name__ == '__main__':
asyncio.run(test_improved())
if __name__ == "__main__":
asyncio.run(test_improved())

View File

@ -3,38 +3,43 @@
import asyncio
import logging
import sys
import os
import sys
# Add the project root to the Python path
sys.path.insert(0, os.path.dirname(__file__))
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def test_our_implementation():
"""Test our actual implementation directly."""
# Import just the core pieces we need
from pypi_query_mcp.core.exceptions import (
InvalidPackageNameError,
PackageNotFoundError,
)
from pypi_query_mcp.core.pypi_client import PyPIClient
from pypi_query_mcp.tools.package_query import (
query_package_dependencies,
format_dependency_info,
query_package_dependencies,
validate_version_format,
format_dependency_info
)
from pypi_query_mcp.core.exceptions import PackageNotFoundError, InvalidPackageNameError
logger.info("Testing our actual implementation...")
# Test 1: Version validation
logger.info("Testing version validation...")
assert validate_version_format("1.0.0") == True
assert validate_version_format("invalid!") == False
assert validate_version_format(None) == True
logger.info("✅ Version validation works correctly")
# Test 2: PyPI Client with version
logger.info("Testing PyPIClient with version parameter...")
async with PyPIClient() as client:
@ -42,82 +47,94 @@ async def test_our_implementation():
data = await client.get_package_info("django", version="4.2.0")
assert data["info"]["version"] in ["4.2", "4.2.0"]
logger.info(f"✅ Got Django 4.2.0: {data['info']['version']}")
# Test latest version
data = await client.get_package_info("django", version=None)
latest_version = data["info"]["version"]
logger.info(f"✅ Got Django latest: {latest_version}")
# Verify they're different (unless 4.2 is latest, which is unlikely)
if latest_version not in ["4.2", "4.2.0"]:
logger.info("✅ Confirmed version-specific queries work differently than latest")
logger.info(
"✅ Confirmed version-specific queries work differently than latest"
)
# Test 3: Dependency formatting
logger.info("Testing dependency formatting...")
async with PyPIClient() as client:
data = await client.get_package_info("django", version="4.2.0")
formatted = format_dependency_info(data)
assert "package_name" in formatted
assert "version" in formatted
assert "runtime_dependencies" in formatted
assert "dependency_summary" in formatted
assert formatted["version"] in ["4.2", "4.2.0"]
logger.info(f"✅ Dependency formatting works: {len(formatted['runtime_dependencies'])} runtime deps")
logger.info(
f"✅ Dependency formatting works: {len(formatted['runtime_dependencies'])} runtime deps"
)
# Test 4: Full query_package_dependencies function
logger.info("Testing query_package_dependencies function...")
# Test with Django 4.2.0
result = await query_package_dependencies("django", "4.2.0")
assert result["package_name"].lower() == "django"
assert result["version"] in ["4.2", "4.2.0"]
logger.info(f"✅ Django 4.2.0 dependencies: {len(result['runtime_dependencies'])} runtime deps")
logger.info(
f"✅ Django 4.2.0 dependencies: {len(result['runtime_dependencies'])} runtime deps"
)
# Test with Django latest
result_latest = await query_package_dependencies("django", None)
assert result_latest["package_name"].lower() == "django"
logger.info(f"✅ Django latest dependencies: {len(result_latest['runtime_dependencies'])} runtime deps")
logger.info(
f"✅ Django latest dependencies: {len(result_latest['runtime_dependencies'])} runtime deps"
)
# Verify they might be different
if result["version"] != result_latest["version"]:
logger.info("✅ Confirmed: version-specific query returns different version than latest")
logger.info(
"✅ Confirmed: version-specific query returns different version than latest"
)
# Test 5: Error cases
logger.info("Testing error cases...")
# Invalid version format
try:
await query_package_dependencies("django", "invalid!")
assert False, "Should have raised InvalidPackageNameError"
except InvalidPackageNameError:
logger.info("✅ Invalid version format correctly rejected")
# Non-existent version
try:
await query_package_dependencies("django", "999.999.999")
assert False, "Should have raised PackageNotFoundError"
except PackageNotFoundError:
logger.info("✅ Non-existent version correctly rejected")
# Test 6: Multiple packages
logger.info("Testing multiple packages...")
packages_and_versions = [
("fastapi", "0.100.0"),
("numpy", "1.20.0"),
("requests", "2.25.1"),
]
for package, version in packages_and_versions:
try:
result = await query_package_dependencies(package, version)
assert result["package_name"].lower() == package.lower()
assert result["version"] == version
logger.info(f"{package} {version}: {len(result['runtime_dependencies'])} runtime deps")
logger.info(
f"{package} {version}: {len(result['runtime_dependencies'])} runtime deps"
)
except Exception as e:
logger.warning(f"⚠️ {package} {version} failed (may not exist): {e}")
return True
@ -134,10 +151,11 @@ async def main():
except Exception as e:
logger.error(f"❌ Test failed with exception: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)
sys.exit(exit_code)

View File

@ -3,6 +3,7 @@
import asyncio
import logging
from pypi_query_mcp.tools.package_query import query_package_versions
# Configure logging
@ -15,39 +16,39 @@ async def test_real_package_versions():
print("=" * 60)
print("Testing Real Package Version Sorting")
print("=" * 60)
# Test packages known to have complex version histories
test_packages = [
"django", # Known for alpha, beta, rc versions
"numpy", # Long history with various formats
"requests" # Simple but well-known package
"django", # Known for alpha, beta, rc versions
"numpy", # Long history with various formats
"requests", # Simple but well-known package
]
for package_name in test_packages:
try:
print(f"\nTesting {package_name}:")
result = await query_package_versions(package_name)
recent_versions = result.get("recent_versions", [])[:10]
print(f" Recent versions (first 10): {recent_versions}")
# Show older-style string sorting for comparison
all_versions = result.get("versions", [])
if all_versions:
# Use basic string sorting (the old way)
string_sorted = sorted(all_versions[:20], reverse=True)
print(f" String-sorted (first 10): {string_sorted[:10]}")
print(f" Semantic vs String comparison:")
print(" Semantic vs String comparison:")
for i in range(min(5, len(recent_versions))):
semantic = recent_versions[i] if i < len(recent_versions) else "N/A"
string_sort = string_sorted[i] if i < len(string_sorted) else "N/A"
match = "" if semantic == string_sort else ""
print(f" {i+1}: {semantic} vs {string_sort} {match}")
print(f" {i + 1}: {semantic} vs {string_sort} {match}")
except Exception as e:
print(f" Error querying {package_name}: {e}")
print()
@ -56,50 +57,50 @@ async def test_specific_version_ordering():
print("=" * 60)
print("Specific Version Ordering Tests")
print("=" * 60)
# Let's test django which is known to have alpha, beta, rc versions
try:
print("Testing Django version ordering:")
result = await query_package_versions("django")
all_versions = result.get("versions", [])
# Find versions around a specific release to verify ordering
django_4_versions = [v for v in all_versions if v.startswith("4.2")][:15]
print(f" Django 4.2.x versions: {django_4_versions}")
# Check if pre-release versions are properly ordered
pre_release_pattern = ["4.2a1", "4.2b1", "4.2rc1", "4.2.0"]
found_versions = [v for v in django_4_versions if v in pre_release_pattern]
print(f" Found pre-release sequence: {found_versions}")
if len(found_versions) > 1:
print(" Checking pre-release ordering:")
for i in range(len(found_versions) - 1):
current = found_versions[i]
next_ver = found_versions[i + 1]
print(f" {current} comes before {next_ver}")
except Exception as e:
print(f" Error testing Django versions: {e}")
print()
async def main():
"""Main test function."""
print("Real Package Version Sorting Test")
print("="*60)
print("=" * 60)
# Test with real packages
await test_real_package_versions()
# Test specific version ordering scenarios
await test_specific_version_ordering()
print("=" * 60)
print("Real package test completed!")
if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

View File

@ -3,22 +3,24 @@
import asyncio
import logging
import sys
import os
import sys
# Add the project root to the Python path
sys.path.insert(0, os.path.dirname(__file__))
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def test_pypi_client():
"""Test the PyPIClient with version-specific queries."""
from pypi_query_mcp.core.pypi_client import PyPIClient
from pypi_query_mcp.core.exceptions import PackageNotFoundError
from pypi_query_mcp.core.pypi_client import PyPIClient
async with PyPIClient() as client:
# Test 1: Django 4.2.0 (specific version)
logger.info("Testing Django 4.2.0...")
@ -32,15 +34,15 @@ async def test_pypi_client():
else:
logger.error(f"❌ Expected version 4.2.0, got {actual_version}")
return False
# Check dependencies
deps = data.get("info", {}).get("requires_dist", [])
logger.info(f" Dependencies found: {len(deps)}")
except Exception as e:
logger.error(f"❌ Django 4.2.0 test failed: {e}")
return False
# Test 2: Latest Django (no version)
logger.info("Testing Django latest...")
try:
@ -50,7 +52,7 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ Django latest test failed: {e}")
return False
# Test 3: Non-existent version (should fail)
logger.info("Testing Django 999.999.999 (should fail)...")
try:
@ -62,7 +64,7 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ Unexpected error type: {e}")
return False
# Test 4: FastAPI 0.100.0
logger.info("Testing FastAPI 0.100.0...")
try:
@ -76,7 +78,7 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ FastAPI 0.100.0 test failed: {e}")
return False
# Test 5: NumPy 1.20.0
logger.info("Testing NumPy 1.20.0...")
try:
@ -90,15 +92,20 @@ async def test_pypi_client():
except Exception as e:
logger.error(f"❌ NumPy 1.20.0 test failed: {e}")
return False
return True
async def test_dependency_query():
"""Test the query_package_dependencies function."""
from pypi_query_mcp.tools.package_query import query_package_dependencies, validate_version_format
from pypi_query_mcp.core.exceptions import InvalidPackageNameError, PackageNotFoundError
from pypi_query_mcp.core.exceptions import (
InvalidPackageNameError,
)
from pypi_query_mcp.tools.package_query import (
query_package_dependencies,
validate_version_format,
)
# Test version validation
logger.info("Testing version validation...")
test_versions = [
@ -112,30 +119,39 @@ async def test_dependency_query():
("", False),
(None, True),
]
for version, expected in test_versions:
result = validate_version_format(version)
if result == expected:
logger.info(f"✅ Version validation for '{version}': {result}")
else:
logger.error(f"❌ Version validation for '{version}': expected {expected}, got {result}")
logger.error(
f"❌ Version validation for '{version}': expected {expected}, got {result}"
)
return False
# Test dependency queries
logger.info("Testing dependency queries...")
# Test Django 4.2.0 dependencies
try:
result = await query_package_dependencies("django", "4.2.0")
if result["package_name"].lower() == "django" and result["version"] in ["4.2", "4.2.0"]:
logger.info(f"✅ Django 4.2.0 dependencies query passed - {len(result['runtime_dependencies'])} runtime deps")
if result["package_name"].lower() == "django" and result["version"] in [
"4.2",
"4.2.0",
]:
logger.info(
f"✅ Django 4.2.0 dependencies query passed - {len(result['runtime_dependencies'])} runtime deps"
)
else:
logger.error(f"❌ Django dependencies query failed - got {result['package_name']} v{result['version']}")
logger.error(
f"❌ Django dependencies query failed - got {result['package_name']} v{result['version']}"
)
return False
except Exception as e:
logger.error(f"❌ Django dependencies query failed: {e}")
return False
# Test invalid version format
try:
result = await query_package_dependencies("django", "invalid.version!")
@ -146,30 +162,30 @@ async def test_dependency_query():
except Exception as e:
logger.error(f"❌ Unexpected error for invalid version: {e}")
return False
return True
async def main():
"""Run all tests."""
logger.info("Starting PyPI client and dependency query tests...")
success = True
# Test PyPI client
if await test_pypi_client():
logger.info("✅ PyPI client tests passed")
else:
logger.error("❌ PyPI client tests failed")
success = False
# Test dependency queries
if await test_dependency_query():
logger.info("✅ Dependency query tests passed")
else:
logger.error("❌ Dependency query tests failed")
success = False
if success:
logger.info("🎉 All tests passed!")
return 0
@ -180,4 +196,4 @@ async def main():
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)
sys.exit(exit_code)

View File

@ -3,58 +3,65 @@
from pypi_query_mcp.core.version_utils import sort_versions_semantically
def test_specific_case():
"""Test the exact case mentioned in the task requirements."""
print("=" * 60)
print("Testing Specific Task Requirement")
print("=" * 60)
# The exact problem mentioned in the task
versions = ["5.2rc1", "5.2.5"]
# Old way (string sorting)
old_sorted = sorted(versions, reverse=True)
# New way (semantic sorting)
new_sorted = sort_versions_semantically(versions, reverse=True)
print(f"Original versions: {versions}")
print(f"Old string sorting: {old_sorted}")
print(f"New semantic sorting: {new_sorted}")
print()
print("Analysis:")
print(f" Problem: '5.2rc1' was appearing before '5.2.5' in string sorting")
print(" Problem: '5.2rc1' was appearing before '5.2.5' in string sorting")
print(f" String sorting result: {old_sorted[0]} comes first")
print(f" Semantic sorting result: {new_sorted[0]} comes first")
print()
if new_sorted == ["5.2.5", "5.2rc1"]:
print(" ✅ SUCCESS: Semantic sorting correctly places 5.2.5 before 5.2rc1")
print(" ✅ This fixes the issue described in the task!")
else:
print(" ❌ FAILED: The issue is not resolved")
print()
# Test a more comprehensive example
comprehensive_test = [
"5.2.5", "5.2rc1", "5.2.0", "5.2a1", "5.2b1",
"5.1.0", "5.3.0", "5.2.1"
"5.2.5",
"5.2rc1",
"5.2.0",
"5.2a1",
"5.2b1",
"5.1.0",
"5.3.0",
"5.2.1",
]
old_comprehensive = sorted(comprehensive_test, reverse=True)
new_comprehensive = sort_versions_semantically(comprehensive_test, reverse=True)
print("Comprehensive version sorting test:")
print(f" Input: {comprehensive_test}")
print(f" String sorted: {old_comprehensive}")
print(f" Semantic sorted: {new_comprehensive}")
print()
print("Expected semantic order (newest to oldest):")
print(" 5.3.0 > 5.2.5 > 5.2.1 > 5.2.0 > 5.2rc1 > 5.2b1 > 5.2a1 > 5.1.0")
if __name__ == "__main__":
test_specific_case()
test_specific_case()

View File

@ -3,7 +3,7 @@
import asyncio
import sys
import json
from pypi_query_mcp.tools.package_query import query_package_dependencies
@ -12,10 +12,14 @@ async def test_direct_dependencies():
print("Testing direct dependencies for 'requests'...")
try:
result = await query_package_dependencies("requests", include_transitive=False)
print(f"✓ Direct dependencies found: {len(result.get('runtime_dependencies', []))}")
print(
f"✓ Direct dependencies found: {len(result.get('runtime_dependencies', []))}"
)
print(f" Package: {result.get('package_name')}")
print(f" Version: {result.get('version')}")
print(f" Runtime deps: {result.get('runtime_dependencies', [])[:3]}...") # Show first 3
print(
f" Runtime deps: {result.get('runtime_dependencies', [])[:3]}..."
) # Show first 3
return True
except Exception as e:
print(f"✗ Error testing direct dependencies: {e}")
@ -27,44 +31,44 @@ async def test_transitive_dependencies():
print("\nTesting transitive dependencies for 'requests'...")
try:
result = await query_package_dependencies(
"requests",
include_transitive=True,
max_depth=3,
python_version="3.10"
"requests", include_transitive=True, max_depth=3, python_version="3.10"
)
print(f"✓ Transitive analysis completed")
print("✓ Transitive analysis completed")
print(f" Include transitive: {result.get('include_transitive')}")
print(f" Package: {result.get('package_name')}")
print(f" Version: {result.get('version')}")
# Check transitive dependency structure
transitive = result.get('transitive_dependencies', {})
all_packages = transitive.get('all_packages', {})
transitive = result.get("transitive_dependencies", {})
all_packages = transitive.get("all_packages", {})
print(f" Total packages in tree: {len(all_packages)}")
# Check summary
summary = result.get('dependency_summary', {})
summary = result.get("dependency_summary", {})
print(f" Direct runtime deps: {summary.get('direct_runtime_count', 0)}")
print(f" Total transitive packages: {summary.get('total_transitive_packages', 0)}")
print(
f" Total transitive packages: {summary.get('total_transitive_packages', 0)}"
)
print(f" Max depth: {summary.get('max_dependency_depth', 0)}")
# Check analysis
analysis = result.get('analysis', {})
performance = analysis.get('performance_impact', {})
analysis = result.get("analysis", {})
performance = analysis.get("performance_impact", {})
print(f" Performance level: {performance.get('performance_level', 'unknown')}")
complexity = summary.get('complexity_score', {})
complexity = summary.get("complexity_score", {})
print(f" Complexity level: {complexity.get('level', 'unknown')}")
# Check circular dependencies
circular = transitive.get('circular_dependencies', [])
circular = transitive.get("circular_dependencies", [])
print(f" Circular dependencies found: {len(circular)}")
return True
except Exception as e:
print(f"✗ Error testing transitive dependencies: {e}")
import traceback
traceback.print_exc()
return False
@ -74,19 +78,17 @@ async def test_small_package():
print("\nTesting transitive dependencies for 'six' (smaller package)...")
try:
result = await query_package_dependencies(
"six",
include_transitive=True,
max_depth=2
"six", include_transitive=True, max_depth=2
)
transitive = result.get('transitive_dependencies', {})
all_packages = transitive.get('all_packages', {})
print(f"✓ Analysis completed for 'six'")
transitive = result.get("transitive_dependencies", {})
all_packages = transitive.get("all_packages", {})
print("✓ Analysis completed for 'six'")
print(f" Total packages: {len(all_packages)}")
summary = result.get('dependency_summary', {})
summary = result.get("dependency_summary", {})
print(f" Direct runtime deps: {summary.get('direct_runtime_count', 0)}")
return True
except Exception as e:
print(f"✗ Error testing 'six': {e}")
@ -97,21 +99,21 @@ async def main():
"""Run all tests."""
print("Testing PyPI Query MCP Server - Transitive Dependencies")
print("=" * 60)
results = []
# Test 1: Direct dependencies (existing functionality)
results.append(await test_direct_dependencies())
# Test 2: Transitive dependencies (new functionality)
results.append(await test_transitive_dependencies())
# Test 3: Small package test
results.append(await test_small_package())
print("\n" + "=" * 60)
print(f"Test Results: {sum(results)}/{len(results)} passed")
if all(results):
print("✓ All tests passed! Transitive dependency functionality is working.")
return 0
@ -121,4 +123,4 @@ async def main():
if __name__ == "__main__":
sys.exit(asyncio.run(main()))
sys.exit(asyncio.run(main()))

View File

@ -3,95 +3,114 @@
import asyncio
import logging
import sys
import os
import sys
# Add the project root to the Python path
sys.path.insert(0, os.path.dirname(__file__))
from pypi_query_mcp.tools.package_query import query_package_dependencies
from pypi_query_mcp.core.exceptions import PackageNotFoundError, InvalidPackageNameError
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
async def test_package_version(package_name: str, version: str = None, expect_error: bool = False):
async def test_package_version(
package_name: str, version: str = None, expect_error: bool = False
):
"""Test a specific package and version combination."""
version_str = f" version {version}" if version else " (latest)"
logger.info(f"Testing {package_name}{version_str}")
try:
result = await query_package_dependencies(package_name, version)
if expect_error:
logger.error(f"Expected error for {package_name}{version_str}, but got result")
logger.error(
f"Expected error for {package_name}{version_str}, but got result"
)
return False
# Verify the result contains expected fields
required_fields = ["package_name", "version", "runtime_dependencies", "dependency_summary"]
required_fields = [
"package_name",
"version",
"runtime_dependencies",
"dependency_summary",
]
for field in required_fields:
if field not in result:
logger.error(f"Missing field '{field}' in result for {package_name}{version_str}")
logger.error(
f"Missing field '{field}' in result for {package_name}{version_str}"
)
return False
# Check if we got the correct version
actual_version = result.get("version", "")
if version and actual_version != version:
logger.error(f"Expected version {version}, got {actual_version} for {package_name}")
logger.error(
f"Expected version {version}, got {actual_version} for {package_name}"
)
return False
logger.info(f"✅ Success: {package_name}{version_str} - Got version {actual_version}")
logger.info(
f"✅ Success: {package_name}{version_str} - Got version {actual_version}"
)
logger.info(f" Runtime dependencies: {len(result['runtime_dependencies'])}")
logger.info(f" Total dependencies: {result['dependency_summary']['runtime_count']}")
logger.info(
f" Total dependencies: {result['dependency_summary']['runtime_count']}"
)
return True
except Exception as e:
if expect_error:
logger.info(f"✅ Expected error for {package_name}{version_str}: {type(e).__name__}: {e}")
logger.info(
f"✅ Expected error for {package_name}{version_str}: {type(e).__name__}: {e}"
)
return True
else:
logger.error(f"❌ Unexpected error for {package_name}{version_str}: {type(e).__name__}: {e}")
logger.error(
f"❌ Unexpected error for {package_name}{version_str}: {type(e).__name__}: {e}"
)
return False
async def main():
"""Run all tests."""
logger.info("Starting version parameter fix tests...")
tests = [
# Test with valid package versions
("django", "4.2.0", False),
("fastapi", "0.100.0", False),
("numpy", "1.20.0", False),
# Test latest versions (no version specified)
("requests", None, False),
("click", None, False),
# Test edge cases - should fail
("django", "999.999.999", True), # Non-existent version
("nonexistent-package-12345", None, True), # Non-existent package
("django", "invalid.version.format!", True), # Invalid version format
# Test pre-release versions
("django", "5.0a1", False), # Pre-release (may or may not exist)
]
passed = 0
total = len(tests)
for package, version, expect_error in tests:
try:
if await test_package_version(package, version, expect_error):
passed += 1
except Exception as e:
logger.error(f"Test framework error: {e}")
logger.info(f"\nTest Results: {passed}/{total} tests passed")
if passed == total:
logger.info("🎉 All tests passed!")
return 0
@ -102,4 +121,4 @@ async def main():
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)
sys.exit(exit_code)

View File

@ -3,6 +3,7 @@
import asyncio
import logging
from pypi_query_mcp.core.version_utils import sort_versions_semantically
from pypi_query_mcp.tools.package_query import query_package_versions
@ -16,55 +17,75 @@ def test_semantic_version_sorting():
print("=" * 60)
print("Testing Semantic Version Sorting Function")
print("=" * 60)
# Test case 1: Basic pre-release ordering
test1_versions = ["5.2rc1", "5.2.5", "5.2.0", "5.2a1", "5.2b1"]
sorted1 = sort_versions_semantically(test1_versions)
print(f"Test 1 - Pre-release ordering:")
print("Test 1 - Pre-release ordering:")
print(f" Input: {test1_versions}")
print(f" Output: {sorted1}")
print(f" Expected: ['5.2.5', '5.2.0', '5.2rc1', '5.2b1', '5.2a1']")
print(" Expected: ['5.2.5', '5.2.0', '5.2rc1', '5.2b1', '5.2a1']")
print()
# Test case 2: Complex Django-like versions
test2_versions = [
"4.2.0", "4.2a1", "4.2b1", "4.2rc1", "4.1.0", "4.1.7",
"4.0.0", "3.2.18", "4.2.1", "4.2.2"
"4.2.0",
"4.2a1",
"4.2b1",
"4.2rc1",
"4.1.0",
"4.1.7",
"4.0.0",
"3.2.18",
"4.2.1",
"4.2.2",
]
sorted2 = sort_versions_semantically(test2_versions)
print(f"Test 2 - Django-like versions:")
print("Test 2 - Django-like versions:")
print(f" Input: {test2_versions}")
print(f" Output: {sorted2}")
print()
# Test case 3: TensorFlow-like versions with dev builds
test3_versions = [
"2.13.0", "2.13.0rc1", "2.13.0rc0", "2.12.0", "2.12.1",
"2.14.0dev20230517", "2.13.0rc2" # This might not parse correctly
"2.13.0",
"2.13.0rc1",
"2.13.0rc0",
"2.12.0",
"2.12.1",
"2.14.0dev20230517",
"2.13.0rc2", # This might not parse correctly
]
sorted3 = sort_versions_semantically(test3_versions)
print(f"Test 3 - TensorFlow-like versions:")
print("Test 3 - TensorFlow-like versions:")
print(f" Input: {test3_versions}")
print(f" Output: {sorted3}")
print()
# Test case 4: Edge cases and malformed versions
test4_versions = [
"1.0.0", "1.0.0.post1", "1.0.0.dev0", "1.0.0a1", "1.0.0b1",
"1.0.0rc1", "1.0.1", "invalid-version", "1.0"
"1.0.0",
"1.0.0.post1",
"1.0.0.dev0",
"1.0.0a1",
"1.0.0b1",
"1.0.0rc1",
"1.0.1",
"invalid-version",
"1.0",
]
sorted4 = sort_versions_semantically(test4_versions)
print(f"Test 4 - Edge cases and malformed versions:")
print("Test 4 - Edge cases and malformed versions:")
print(f" Input: {test4_versions}")
print(f" Output: {sorted4}")
print()
# Test case 5: Empty and single item lists
test5_empty = []
test5_single = ["1.0.0"]
sorted5_empty = sort_versions_semantically(test5_empty)
sorted5_single = sort_versions_semantically(test5_single)
print(f"Test 5 - Edge cases:")
print("Test 5 - Edge cases:")
print(f" Empty list: {sorted5_empty}")
print(f" Single item: {sorted5_single}")
print()
@ -75,30 +96,30 @@ async def test_real_package_versions():
print("=" * 60)
print("Testing Real Package Version Sorting")
print("=" * 60)
# Test packages known to have complex version histories
test_packages = [
"django", # Known for alpha, beta, rc versions
"django", # Known for alpha, beta, rc versions
"tensorflow", # Complex versioning with dev builds
"numpy", # Long history with various formats
"requests" # Simple but well-known package
"numpy", # Long history with various formats
"requests", # Simple but well-known package
]
for package_name in test_packages:
try:
print(f"\nTesting {package_name}:")
result = await query_package_versions(package_name)
recent_versions = result.get("recent_versions", [])[:10]
print(f" Recent versions (first 10): {recent_versions}")
# Check if versions seem to be properly sorted
if len(recent_versions) >= 3:
print(f" First three versions: {recent_versions[:3]}")
except Exception as e:
print(f" Error querying {package_name}: {e}")
print()
@ -107,58 +128,58 @@ def validate_sorting_correctness():
print("=" * 60)
print("Validation Tests")
print("=" * 60)
# The specific example from the task: "5.2rc1" should come after "5.2.5"
task_example = ["5.2rc1", "5.2.5"]
sorted_task = sort_versions_semantically(task_example)
print("Task requirement validation:")
print(f" Input: {task_example}")
print(f" Output: {sorted_task}")
print(f" Requirement: '5.2rc1' should come after '5.2.5'")
print(" Requirement: '5.2rc1' should come after '5.2.5'")
if sorted_task == ["5.2.5", "5.2rc1"]:
print(" ✅ PASS: Requirement met!")
else:
print(" ❌ FAIL: Requirement not met!")
print()
# Test pre-release ordering: alpha < beta < rc < stable
pre_release_test = ["1.0.0", "1.0.0rc1", "1.0.0b1", "1.0.0a1"]
sorted_pre = sort_versions_semantically(pre_release_test)
print("Pre-release ordering validation:")
print(f" Input: {pre_release_test}")
print(f" Output: {sorted_pre}")
print(f" Expected order: stable > rc > beta > alpha")
print(" Expected order: stable > rc > beta > alpha")
expected_order = ["1.0.0", "1.0.0rc1", "1.0.0b1", "1.0.0a1"]
if sorted_pre == expected_order:
print(" ✅ PASS: Pre-release ordering correct!")
else:
print(" ❌ FAIL: Pre-release ordering incorrect!")
print()
async def main():
"""Main test function."""
print("Semantic Version Sorting Test Suite")
print("="*60)
print("=" * 60)
# Run unit tests
test_semantic_version_sorting()
# Validate specific requirements
validate_sorting_correctness()
# Test with real packages
await test_real_package_versions()
print("=" * 60)
print("Test suite completed!")
if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

View File

@ -2,7 +2,8 @@
"""Standalone test script to verify semantic version sorting functionality."""
import logging
from packaging.version import Version, InvalidVersion
from packaging.version import InvalidVersion, Version
# Configure logging
logging.basicConfig(level=logging.INFO)
@ -11,31 +12,31 @@ logger = logging.getLogger(__name__)
def sort_versions_semantically(versions: list[str], reverse: bool = True) -> list[str]:
"""Sort package versions using semantic version ordering.
This function properly sorts versions by parsing them as semantic versions,
ensuring that pre-release versions (alpha, beta, rc) are ordered correctly
relative to stable releases.
Args:
versions: List of version strings to sort
reverse: If True, sort in descending order (newest first). Default True.
Returns:
List of version strings sorted semantically
Examples:
>>> sort_versions_semantically(['1.0.0', '2.0.0a1', '1.5.0', '2.0.0'])
['2.0.0', '2.0.0a1', '1.5.0', '1.0.0']
>>> sort_versions_semantically(['5.2rc1', '5.2.5', '5.2.0'])
['5.2.5', '5.2.0', '5.2rc1']
"""
if not versions:
return []
def parse_version_safe(version_str: str) -> tuple[Version | None, str]:
"""Safely parse a version string, returning (parsed_version, original_string).
Returns (None, original_string) if parsing fails.
"""
try:
@ -43,28 +44,28 @@ def sort_versions_semantically(versions: list[str], reverse: bool = True) -> lis
except InvalidVersion:
logger.debug(f"Failed to parse version '{version_str}' as semantic version")
return (None, version_str)
# Parse all versions, keeping track of originals
parsed_versions = [parse_version_safe(v) for v in versions]
# Separate valid and invalid versions
valid_versions = [(v, orig) for v, orig in parsed_versions if v is not None]
invalid_versions = [orig for v, orig in parsed_versions if v is None]
# Sort valid versions semantically
valid_versions.sort(key=lambda x: x[0], reverse=reverse)
# Sort invalid versions lexicographically as fallback
invalid_versions.sort(reverse=reverse)
# Combine results: valid versions first, then invalid ones
result = [orig for _, orig in valid_versions] + invalid_versions
logger.debug(
f"Sorted {len(versions)} versions: {len(valid_versions)} valid, "
f"{len(invalid_versions)} invalid"
)
return result
@ -73,55 +74,75 @@ def test_semantic_version_sorting():
print("=" * 60)
print("Testing Semantic Version Sorting Function")
print("=" * 60)
# Test case 1: Basic pre-release ordering
test1_versions = ["5.2rc1", "5.2.5", "5.2.0", "5.2a1", "5.2b1"]
sorted1 = sort_versions_semantically(test1_versions)
print(f"Test 1 - Pre-release ordering:")
print("Test 1 - Pre-release ordering:")
print(f" Input: {test1_versions}")
print(f" Output: {sorted1}")
print(f" Expected: ['5.2.5', '5.2.0', '5.2rc1', '5.2b1', '5.2a1']")
print(" Expected: ['5.2.5', '5.2.0', '5.2rc1', '5.2b1', '5.2a1']")
print()
# Test case 2: Complex Django-like versions
test2_versions = [
"4.2.0", "4.2a1", "4.2b1", "4.2rc1", "4.1.0", "4.1.7",
"4.0.0", "3.2.18", "4.2.1", "4.2.2"
"4.2.0",
"4.2a1",
"4.2b1",
"4.2rc1",
"4.1.0",
"4.1.7",
"4.0.0",
"3.2.18",
"4.2.1",
"4.2.2",
]
sorted2 = sort_versions_semantically(test2_versions)
print(f"Test 2 - Django-like versions:")
print("Test 2 - Django-like versions:")
print(f" Input: {test2_versions}")
print(f" Output: {sorted2}")
print()
# Test case 3: TensorFlow-like versions with dev builds
test3_versions = [
"2.13.0", "2.13.0rc1", "2.13.0rc0", "2.12.0", "2.12.1",
"2.14.0dev20230517", "2.13.0rc2" # This might not parse correctly
"2.13.0",
"2.13.0rc1",
"2.13.0rc0",
"2.12.0",
"2.12.1",
"2.14.0dev20230517",
"2.13.0rc2", # This might not parse correctly
]
sorted3 = sort_versions_semantically(test3_versions)
print(f"Test 3 - TensorFlow-like versions:")
print("Test 3 - TensorFlow-like versions:")
print(f" Input: {test3_versions}")
print(f" Output: {sorted3}")
print()
# Test case 4: Edge cases and malformed versions
test4_versions = [
"1.0.0", "1.0.0.post1", "1.0.0.dev0", "1.0.0a1", "1.0.0b1",
"1.0.0rc1", "1.0.1", "invalid-version", "1.0"
"1.0.0",
"1.0.0.post1",
"1.0.0.dev0",
"1.0.0a1",
"1.0.0b1",
"1.0.0rc1",
"1.0.1",
"invalid-version",
"1.0",
]
sorted4 = sort_versions_semantically(test4_versions)
print(f"Test 4 - Edge cases and malformed versions:")
print("Test 4 - Edge cases and malformed versions:")
print(f" Input: {test4_versions}")
print(f" Output: {sorted4}")
print()
# Test case 5: Empty and single item lists
test5_empty = []
test5_single = ["1.0.0"]
sorted5_empty = sort_versions_semantically(test5_empty)
sorted5_single = sort_versions_semantically(test5_single)
print(f"Test 5 - Edge cases:")
print("Test 5 - Edge cases:")
print(f" Empty list: {sorted5_empty}")
print(f" Single item: {sorted5_single}")
print()
@ -132,38 +153,38 @@ def validate_sorting_correctness():
print("=" * 60)
print("Validation Tests")
print("=" * 60)
# The specific example from the task: "5.2rc1" should come after "5.2.5"
task_example = ["5.2rc1", "5.2.5"]
sorted_task = sort_versions_semantically(task_example)
print("Task requirement validation:")
print(f" Input: {task_example}")
print(f" Output: {sorted_task}")
print(f" Requirement: '5.2rc1' should come after '5.2.5'")
print(" Requirement: '5.2rc1' should come after '5.2.5'")
if sorted_task == ["5.2.5", "5.2rc1"]:
print(" ✅ PASS: Requirement met!")
else:
print(" ❌ FAIL: Requirement not met!")
print()
# Test pre-release ordering: alpha < beta < rc < stable
pre_release_test = ["1.0.0", "1.0.0rc1", "1.0.0b1", "1.0.0a1"]
sorted_pre = sort_versions_semantically(pre_release_test)
print("Pre-release ordering validation:")
print(f" Input: {pre_release_test}")
print(f" Output: {sorted_pre}")
print(f" Expected order: stable > rc > beta > alpha")
print(" Expected order: stable > rc > beta > alpha")
expected_order = ["1.0.0", "1.0.0rc1", "1.0.0b1", "1.0.0a1"]
if sorted_pre == expected_order:
print(" ✅ PASS: Pre-release ordering correct!")
else:
print(" ❌ FAIL: Pre-release ordering incorrect!")
print()
@ -172,7 +193,7 @@ def test_version_comparison_details():
print("=" * 60)
print("Version Comparison Details")
print("=" * 60)
test_versions = [
("1.0.0", "1.0.0a1"),
("1.0.0", "1.0.0b1"),
@ -184,7 +205,7 @@ def test_version_comparison_details():
("1.0.0.post1", "1.0.0"),
("1.0.0.dev0", "1.0.0"),
]
for v1, v2 in test_versions:
try:
ver1 = Version(v1)
@ -193,27 +214,27 @@ def test_version_comparison_details():
print(f" {v1} {comparison} {v2}")
except Exception as e:
print(f" Error comparing {v1} and {v2}: {e}")
print()
def main():
"""Main test function."""
print("Semantic Version Sorting Test Suite")
print("="*60)
print("=" * 60)
# Run unit tests
test_semantic_version_sorting()
# Validate specific requirements
validate_sorting_correctness()
# Show detailed version comparisons
test_version_comparison_details()
print("=" * 60)
print("Test suite completed!")
if __name__ == "__main__":
main()
main()

View File

@ -99,7 +99,7 @@ class TestDependencyResolver:
"requires_dist": [],
}
}
mock_pytest_data = {
"info": {
"name": "pytest",
@ -112,7 +112,7 @@ class TestDependencyResolver:
with patch("pypi_query_mcp.core.PyPIClient") as mock_client_class:
mock_client = AsyncMock()
mock_client_class.return_value.__aenter__.return_value = mock_client
# Setup mock to return different data based on package name
def mock_get_package_info(package_name):
if package_name.lower() == "mock-test-package-12345":
@ -122,8 +122,14 @@ class TestDependencyResolver:
elif package_name.lower() == "pytest":
return mock_pytest_data
else:
return {"info": {"name": package_name, "version": "1.0.0", "requires_dist": []}}
return {
"info": {
"name": package_name,
"version": "1.0.0",
"requires_dist": [],
}
}
mock_client.get_package_info.side_effect = mock_get_package_info
result = await resolver.resolve_dependencies(
@ -132,7 +138,7 @@ class TestDependencyResolver:
assert result["include_extras"] == ["test"]
assert "dependency_tree" in result
# Verify that extras are properly resolved and included
assert result["summary"]["total_extra_dependencies"] == 1
main_pkg = result["dependency_tree"]["mock-test-package-12345"]
@ -166,7 +172,7 @@ class TestDependencyResolver:
"requires_dist": [],
}
}
mock_pytest_data = {
"info": {
"name": "pytest",
@ -175,7 +181,7 @@ class TestDependencyResolver:
"requires_dist": [],
}
}
mock_coverage_data = {
"info": {
"name": "coverage",
@ -188,7 +194,7 @@ class TestDependencyResolver:
with patch("pypi_query_mcp.core.PyPIClient") as mock_client_class:
mock_client = AsyncMock()
mock_client_class.return_value.__aenter__.return_value = mock_client
# Setup mock to return different data based on package name
def mock_get_package_info(package_name):
if package_name.lower() == "test-package":
@ -200,24 +206,33 @@ class TestDependencyResolver:
elif package_name.lower() == "coverage":
return mock_coverage_data
else:
return {"info": {"name": package_name, "version": "1.0.0", "requires_dist": []}}
return {
"info": {
"name": package_name,
"version": "1.0.0",
"requires_dist": [],
}
}
mock_client.get_package_info.side_effect = mock_get_package_info
# Test with Python 3.11 - should not include typing-extensions but should include extras
result = await resolver.resolve_dependencies(
"test-package", python_version="3.11", include_extras=["test"], max_depth=2
"test-package",
python_version="3.11",
include_extras=["test"],
max_depth=2,
)
assert result["include_extras"] == ["test"]
assert result["python_version"] == "3.11"
# Verify that extras are properly resolved
assert result["summary"]["total_extra_dependencies"] == 2
main_pkg = result["dependency_tree"]["test-package"]
assert "test" in main_pkg["dependencies"]["extras"]
assert len(main_pkg["dependencies"]["extras"]["test"]) == 2
# Verify Python version filtering worked for runtime deps but not extras
runtime_deps = main_pkg["dependencies"]["runtime"]
assert len(runtime_deps) == 1 # Only requests, not typing-extensions

View File

@ -159,7 +159,7 @@ class TestDownloadStats:
async def test_get_top_packages_by_downloads_fallback(self):
"""Test top packages retrieval when PyPI API fails (fallback mode)."""
from pypi_query_mcp.core.exceptions import PyPIServerError
with patch(
"pypi_query_mcp.tools.download_stats.PyPIStatsClient"
) as mock_stats_client:
@ -180,7 +180,7 @@ class TestDownloadStats:
assert all("category" in pkg for pkg in result["top_packages"])
assert all("description" in pkg for pkg in result["top_packages"])
assert "curated" in result["data_source"]
# Check that all packages have estimated downloads
assert all(pkg.get("estimated", False) for pkg in result["top_packages"])
@ -188,47 +188,56 @@ class TestDownloadStats:
async def test_get_top_packages_github_enhancement(self):
"""Test GitHub enhancement functionality."""
from pypi_query_mcp.core.exceptions import PyPIServerError
mock_github_stats = {
"stars": 50000,
"forks": 5000,
"updated_at": "2024-01-01T00:00:00Z",
"language": "Python",
"topics": ["http", "requests"]
"topics": ["http", "requests"],
}
with (
patch("pypi_query_mcp.tools.download_stats.PyPIStatsClient") as mock_stats_client,
patch("pypi_query_mcp.tools.download_stats.GitHubAPIClient") as mock_github_client
patch(
"pypi_query_mcp.tools.download_stats.PyPIStatsClient"
) as mock_stats_client,
patch(
"pypi_query_mcp.tools.download_stats.GitHubAPIClient"
) as mock_github_client,
):
# Mock PyPI failure
mock_stats_instance = AsyncMock()
mock_stats_instance.get_recent_downloads.side_effect = PyPIServerError(502)
mock_stats_client.return_value.__aenter__.return_value = mock_stats_instance
# Mock GitHub success
# Mock GitHub success
mock_github_instance = AsyncMock()
mock_github_instance.get_multiple_repo_stats.return_value = {
"psf/requests": mock_github_stats
}
mock_github_client.return_value.__aenter__.return_value = mock_github_instance
mock_github_client.return_value.__aenter__.return_value = (
mock_github_instance
)
result = await get_top_packages_by_downloads("month", 10)
# Find requests package (should be enhanced with GitHub data)
requests_pkg = next((pkg for pkg in result["top_packages"] if pkg["package"] == "requests"), None)
requests_pkg = next(
(pkg for pkg in result["top_packages"] if pkg["package"] == "requests"),
None,
)
if requests_pkg:
assert "github_stars" in requests_pkg
assert "github_forks" in requests_pkg
assert requests_pkg["github_stars"] == 50000
assert requests_pkg.get("github_enhanced", False) == True
@pytest.mark.asyncio
@pytest.mark.asyncio
async def test_get_top_packages_different_periods(self):
"""Test top packages with different time periods."""
from pypi_query_mcp.core.exceptions import PyPIServerError
with patch(
"pypi_query_mcp.tools.download_stats.PyPIStatsClient"
) as mock_stats_client:
@ -238,16 +247,20 @@ class TestDownloadStats:
for period in ["day", "week", "month"]:
result = await get_top_packages_by_downloads(period, 3)
assert result["period"] == period
assert len(result["top_packages"]) == 3
# Check that downloads are scaled appropriately for the period
# Day should have much smaller numbers than month
if period == "day":
assert all(pkg["downloads"] < 50_000_000 for pkg in result["top_packages"])
assert all(
pkg["downloads"] < 50_000_000 for pkg in result["top_packages"]
)
elif period == "month":
assert any(pkg["downloads"] > 100_000_000 for pkg in result["top_packages"])
assert any(
pkg["downloads"] > 100_000_000 for pkg in result["top_packages"]
)
def test_analyze_download_stats(self):
"""Test download statistics analysis."""

View File

@ -1,6 +1,5 @@
"""Tests for semantic version sorting functionality."""
import pytest
from pypi_query_mcp.core.version_utils import sort_versions_semantically
@ -39,7 +38,7 @@ class TestSemanticVersionSorting:
"""Test development and post-release versions."""
versions = ["1.0.0", "1.0.0.post1", "1.0.0.dev0", "1.0.1"]
result = sort_versions_semantically(versions, reverse=True)
# 1.0.1 should be first, then 1.0.0.post1, then 1.0.0, then 1.0.0.dev0
assert result[0] == "1.0.1"
assert result[1] == "1.0.0.post1"
@ -50,7 +49,7 @@ class TestSemanticVersionSorting:
"""Test that invalid versions fall back to string sorting."""
versions = ["1.0.0", "invalid-version", "another-invalid", "2.0.0"]
result = sort_versions_semantically(versions, reverse=True)
# Valid versions should come first
assert result[0] == "2.0.0"
assert result[1] == "1.0.0"
@ -79,9 +78,9 @@ class TestSemanticVersionSorting:
"""Test sorting with mixed version formats."""
versions = ["1.0", "1.0.0", "1.0.1", "v1.0.2"] # v1.0.2 might be invalid
result = sort_versions_semantically(versions, reverse=True)
# Should handle mixed formats gracefully
assert len(result) == 4
assert "1.0.1" in result
assert "1.0.0" in result
assert "1.0" in result
assert "1.0" in result