diff --git a/README.md b/README.md index be705ed..ba1fc3f 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ A Model Context Protocol (MCP) server for querying PyPI package information, dep - šŸ Python version compatibility checking - šŸ” **Advanced dependency analysis and recursive resolution** - šŸ“„ **Package download with dependency collection** +- šŸ“Š **Download statistics and popularity analysis** +- šŸ† **Top packages ranking and trends** - šŸ¢ Private PyPI repository support - ⚔ Fast async operations with caching - šŸ› ļø Easy integration with MCP clients @@ -199,6 +201,11 @@ The server provides the following MCP tools: 6. **resolve_dependencies** - Recursively resolve all package dependencies with detailed analysis 7. **download_package** - Download package and all dependencies to local directory +### Download Statistics & Popularity +8. **get_download_statistics** - Get comprehensive download statistics for any package +9. **get_download_trends** - Analyze download trends and time series data (last 180 days) +10. **get_top_downloaded_packages** - Get the most popular packages by download count + ## Usage Examples Once configured in your MCP client (Claude Desktop, Cline, Cursor, Windsurf), you can ask questions like: @@ -220,6 +227,13 @@ Once configured in your MCP client (Claude Desktop, Cline, Cursor, Windsurf), yo - "Download the requests package with all dependencies to ./downloads folder" - "Collect all packages needed for Django development" +### Download Statistics & Popularity Analysis +- "What are the download statistics for the requests package this month?" +- "Show me the download trends for numpy over the last 180 days" +- "What are the top 10 most downloaded Python packages today?" +- "Compare the popularity of Django vs Flask vs FastAPI" +- "Which web framework has the highest download count this week?" + ### Example Conversations **User**: "Check if Django 4.2 is compatible with Python 3.9" @@ -234,6 +248,12 @@ Once configured in your MCP client (Claude Desktop, Cline, Cursor, Windsurf), yo *[Uses get_package_dependencies tool]* +**User**: "Show me the download statistics for the requests package and tell me which is more popular: requests or urllib3?" + +**AI Assistant**: I'll get the download statistics for both packages and compare their popularity. + +*[Uses get_download_statistics tool for both packages]* + ### Programmatic Usage ```python @@ -247,6 +267,18 @@ result = await mcp_client.call_tool("check_package_python_compatibility", { info = await mcp_client.call_tool("get_package_info", { "package_name": "requests" }) + +# Example: Get download statistics +stats = await mcp_client.call_tool("get_download_statistics", { + "package_name": "numpy", + "period": "month" +}) + +# Example: Get top downloaded packages +top_packages = await mcp_client.call_tool("get_top_downloaded_packages", { + "period": "week", + "limit": 10 +}) ``` ## Development Status @@ -258,9 +290,12 @@ Current implementation status: - āœ… PyPI API client with caching - āœ… MCP tools implementation (package info, versions, dependencies) - āœ… Python version compatibility checking +- āœ… Advanced dependency analysis and recursive resolution +- āœ… Package download with dependency collection +- āœ… **Download statistics and popularity analysis** +- āœ… **Top packages ranking and trends** - āœ… CI/CD pipeline with multi-platform testing - ā³ Private repository support (planned) -- ā³ Advanced dependency analysis (planned) ## Contributing diff --git a/examples/download_stats_demo.py b/examples/download_stats_demo.py new file mode 100644 index 0000000..f81a80f --- /dev/null +++ b/examples/download_stats_demo.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +""" +Demo script for PyPI package download statistics functionality. + +This script demonstrates how to use the new download statistics tools +to analyze PyPI package popularity and trends. +""" + +import asyncio +import json +from datetime import datetime + +from pypi_query_mcp.tools.download_stats import ( + get_package_download_stats, + get_package_download_trends, + get_top_packages_by_downloads, +) + + +async def demo_package_download_stats(): + """Demonstrate package download statistics retrieval.""" + print("=" * 60) + print("PyPI Package Download Statistics Demo") + print("=" * 60) + + # Example packages to analyze + packages = ["requests", "numpy", "django", "flask"] + + for package_name in packages: + print(f"\nšŸ“Š Download Statistics for '{package_name}':") + print("-" * 50) + + try: + # Get download statistics for the last month + stats = await get_package_download_stats(package_name, period="month") + + # Display basic info + metadata = stats.get("metadata", {}) + downloads = stats.get("downloads", {}) + analysis = stats.get("analysis", {}) + + print(f"Package: {metadata.get('name', package_name)}") + print(f"Version: {metadata.get('version', 'unknown')}") + print(f"Summary: {metadata.get('summary', 'No summary available')[:80]}...") + + # Display download counts + print(f"\nDownload Counts:") + print(f" Last Day: {downloads.get('last_day', 0):,}") + print(f" Last Week: {downloads.get('last_week', 0):,}") + print(f" Last Month: {downloads.get('last_month', 0):,}") + + # Display analysis + if analysis: + print(f"\nAnalysis:") + print(f" Total Downloads: {analysis.get('total_downloads', 0):,}") + print(f" Highest Period: {analysis.get('highest_period', 'N/A')}") + + growth = analysis.get('growth_indicators', {}) + if growth: + print(f" Growth Indicators:") + for indicator, value in growth.items(): + print(f" {indicator}: {value}") + + # Display repository info if available + project_urls = metadata.get('project_urls', {}) + if project_urls: + print(f"\nRepository Links:") + for name, url in project_urls.items(): + if url: + print(f" {name}: {url}") + + except Exception as e: + print(f"āŒ Error getting stats for {package_name}: {e}") + + +async def demo_package_download_trends(): + """Demonstrate package download trends analysis.""" + print("\n" + "=" * 60) + print("PyPI Package Download Trends Demo") + print("=" * 60) + + # Analyze trends for a popular package + package_name = "requests" + + print(f"\nšŸ“ˆ Download Trends for '{package_name}':") + print("-" * 50) + + try: + # Get download trends (without mirrors for cleaner data) + trends = await get_package_download_trends(package_name, include_mirrors=False) + + trend_analysis = trends.get("trend_analysis", {}) + time_series = trends.get("time_series", []) + + print(f"Package: {package_name}") + print(f"Data Points: {trend_analysis.get('data_points', 0)}") + print(f"Total Downloads: {trend_analysis.get('total_downloads', 0):,}") + print(f"Average Daily: {trend_analysis.get('average_daily', 0):,.0f}") + print(f"Trend Direction: {trend_analysis.get('trend_direction', 'unknown')}") + + # Display date range + date_range = trend_analysis.get('date_range', {}) + if date_range: + print(f"Date Range: {date_range.get('start')} to {date_range.get('end')}") + + # Display peak day + peak_day = trend_analysis.get('peak_day', {}) + if peak_day: + print(f"Peak Day: {peak_day.get('date')} ({peak_day.get('downloads', 0):,} downloads)") + + # Show recent data points (last 7 days) + if time_series: + print(f"\nRecent Download Data (last 7 days):") + recent_data = [item for item in time_series if item.get('category') == 'without_mirrors'][-7:] + for item in recent_data: + date = item.get('date', 'unknown') + downloads = item.get('downloads', 0) + print(f" {date}: {downloads:,} downloads") + + except Exception as e: + print(f"āŒ Error getting trends for {package_name}: {e}") + + +async def demo_top_packages(): + """Demonstrate top packages by downloads.""" + print("\n" + "=" * 60) + print("Top PyPI Packages by Downloads Demo") + print("=" * 60) + + periods = ["day", "week", "month"] + + for period in periods: + print(f"\nšŸ† Top 10 Packages (last {period}):") + print("-" * 50) + + try: + # Get top packages for this period + top_packages = await get_top_packages_by_downloads(period=period, limit=10) + + packages_list = top_packages.get("top_packages", []) + total_found = top_packages.get("total_found", 0) + + print(f"Found {total_found} packages") + print(f"Data Source: {top_packages.get('data_source', 'unknown')}") + + if top_packages.get("note"): + print(f"Note: {top_packages['note']}") + + print(f"\nRankings:") + for package in packages_list: + rank = package.get("rank", "?") + name = package.get("package", "unknown") + downloads = package.get("downloads", 0) + print(f" {rank:2d}. {name:<20} {downloads:>12,} downloads") + + except Exception as e: + print(f"āŒ Error getting top packages for {period}: {e}") + + +async def demo_package_comparison(): + """Demonstrate comparing multiple packages.""" + print("\n" + "=" * 60) + print("Package Comparison Demo") + print("=" * 60) + + # Compare web frameworks + frameworks = ["django", "flask", "fastapi", "tornado"] + + print(f"\nšŸ” Comparing Web Frameworks (last month downloads):") + print("-" * 70) + + comparison_data = [] + + for framework in frameworks: + try: + stats = await get_package_download_stats(framework, period="month") + downloads = stats.get("downloads", {}) + last_month = downloads.get("last_month", 0) + + comparison_data.append({ + "name": framework, + "downloads": last_month, + "metadata": stats.get("metadata", {}), + }) + + except Exception as e: + print(f"āŒ Error getting stats for {framework}: {e}") + + # Sort by downloads (descending) + comparison_data.sort(key=lambda x: x["downloads"], reverse=True) + + # Display comparison + print(f"{'Rank':<4} {'Framework':<12} {'Downloads':<15} {'Summary'}") + print("-" * 70) + + for i, data in enumerate(comparison_data, 1): + name = data["name"] + downloads = data["downloads"] + summary = data["metadata"].get("summary", "No summary")[:30] + print(f"{i:<4} {name:<12} {downloads:<15,} {summary}...") + + +async def main(): + """Run all demo functions.""" + print("šŸš€ Starting PyPI Download Statistics Demo") + print(f"Timestamp: {datetime.now().isoformat()}") + + try: + # Run all demos + await demo_package_download_stats() + await demo_package_download_trends() + await demo_top_packages() + await demo_package_comparison() + + print("\n" + "=" * 60) + print("āœ… Demo completed successfully!") + print("=" * 60) + + except KeyboardInterrupt: + print("\nāŒ Demo interrupted by user") + except Exception as e: + print(f"\nāŒ Demo failed with error: {e}") + + +if __name__ == "__main__": + # Run the demo + asyncio.run(main()) diff --git a/pypi_query_mcp/core/stats_client.py b/pypi_query_mcp/core/stats_client.py new file mode 100644 index 0000000..88a9abd --- /dev/null +++ b/pypi_query_mcp/core/stats_client.py @@ -0,0 +1,257 @@ +"""PyPI download statistics client using pypistats.org API.""" + +import asyncio +import logging +from datetime import datetime, timedelta +from typing import Any + +import httpx + +from .exceptions import ( + InvalidPackageNameError, + NetworkError, + PackageNotFoundError, + PyPIServerError, + RateLimitError, +) + +logger = logging.getLogger(__name__) + + +class PyPIStatsClient: + """Async client for PyPI download statistics API.""" + + def __init__( + self, + base_url: str = "https://pypistats.org/api", + timeout: float = 30.0, + max_retries: int = 3, + retry_delay: float = 1.0, + ): + """Initialize PyPI stats client. + + Args: + base_url: Base URL for pypistats API + timeout: Request timeout in seconds + max_retries: Maximum number of retry attempts + retry_delay: Delay between retries in seconds + """ + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.max_retries = max_retries + self.retry_delay = retry_delay + + # Simple in-memory cache + self._cache: dict[str, dict[str, Any]] = {} + self._cache_ttl = 3600 # 1 hour (data updates daily) + + # HTTP client configuration + self._client = httpx.AsyncClient( + timeout=httpx.Timeout(timeout), + headers={ + "User-Agent": "pypi-query-mcp-server/0.1.0", + "Accept": "application/json", + }, + follow_redirects=True, + ) + + async def __aenter__(self): + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Async context manager exit.""" + await self.close() + + async def close(self): + """Close the HTTP client.""" + await self._client.aclose() + + def _validate_package_name(self, package_name: str) -> str: + """Validate and normalize package name. + + Args: + package_name: Package name to validate + + Returns: + Normalized package name + + Raises: + InvalidPackageNameError: If package name is invalid + """ + if not package_name or not package_name.strip(): + raise InvalidPackageNameError(package_name) + + # Basic validation + normalized = package_name.strip().lower() + return normalized + + def _get_cache_key(self, endpoint: str, package_name: str = "", **params) -> str: + """Generate cache key for API data.""" + param_str = "&".join(f"{k}={v}" for k, v in sorted(params.items()) if v is not None) + return f"{endpoint}:{package_name}:{param_str}" + + def _is_cache_valid(self, cache_entry: dict[str, Any]) -> bool: + """Check if cache entry is still valid.""" + import time + return time.time() - cache_entry.get("timestamp", 0) < self._cache_ttl + + async def _make_request(self, url: str) -> dict[str, Any]: + """Make HTTP request with retry logic. + + Args: + url: URL to request + + Returns: + JSON response data + + Raises: + NetworkError: For network-related errors + PackageNotFoundError: When package is not found + RateLimitError: When rate limit is exceeded + PyPIServerError: For server errors + """ + last_exception = None + + for attempt in range(self.max_retries + 1): + try: + logger.debug(f"Making request to {url} (attempt {attempt + 1})") + + response = await self._client.get(url) + + # Handle different HTTP status codes + if response.status_code == 200: + return response.json() + elif response.status_code == 404: + # Extract package name from URL for better error message + package_name = url.split("/")[-2] if "/" in url else "unknown" + raise PackageNotFoundError(package_name) + elif response.status_code == 429: + retry_after = response.headers.get("Retry-After") + retry_after_int = int(retry_after) if retry_after else None + raise RateLimitError(retry_after_int) + elif response.status_code >= 500: + raise PyPIServerError(response.status_code) + else: + raise PyPIServerError( + response.status_code, + f"Unexpected status code: {response.status_code}", + ) + + except httpx.TimeoutException as e: + last_exception = NetworkError(f"Request timeout: {e}", e) + except httpx.NetworkError as e: + last_exception = NetworkError(f"Network error: {e}", e) + except (PackageNotFoundError, RateLimitError, PyPIServerError): + # Don't retry these errors + raise + except Exception as e: + last_exception = NetworkError(f"Unexpected error: {e}", e) + + # Wait before retry (except on last attempt) + if attempt < self.max_retries: + await asyncio.sleep(self.retry_delay * (2**attempt)) + + # If we get here, all retries failed + raise last_exception + + async def get_recent_downloads( + self, package_name: str, period: str = "month", use_cache: bool = True + ) -> dict[str, Any]: + """Get recent download statistics for a package. + + Args: + package_name: Name of the package to query + period: Time period ('day', 'week', 'month') + use_cache: Whether to use cached data if available + + Returns: + Dictionary containing recent download statistics + + Raises: + InvalidPackageNameError: If package name is invalid + PackageNotFoundError: If package is not found + NetworkError: For network-related errors + """ + normalized_name = self._validate_package_name(package_name) + cache_key = self._get_cache_key("recent", normalized_name, period=period) + + # Check cache first + if use_cache and cache_key in self._cache: + cache_entry = self._cache[cache_key] + if self._is_cache_valid(cache_entry): + logger.debug(f"Using cached recent downloads for: {normalized_name}") + return cache_entry["data"] + + # Make API request + url = f"{self.base_url}/packages/{normalized_name}/recent" + if period and period != "all": + url += f"?period={period}" + + logger.info(f"Fetching recent downloads for: {normalized_name} (period: {period})") + + try: + data = await self._make_request(url) + + # Cache the result + import time + self._cache[cache_key] = {"data": data, "timestamp": time.time()} + + return data + + except Exception as e: + logger.error(f"Failed to fetch recent downloads for {normalized_name}: {e}") + raise + + async def get_overall_downloads( + self, package_name: str, mirrors: bool = False, use_cache: bool = True + ) -> dict[str, Any]: + """Get overall download time series for a package. + + Args: + package_name: Name of the package to query + mirrors: Whether to include mirror downloads + use_cache: Whether to use cached data if available + + Returns: + Dictionary containing overall download time series + + Raises: + InvalidPackageNameError: If package name is invalid + PackageNotFoundError: If package is not found + NetworkError: For network-related errors + """ + normalized_name = self._validate_package_name(package_name) + cache_key = self._get_cache_key("overall", normalized_name, mirrors=mirrors) + + # Check cache first + if use_cache and cache_key in self._cache: + cache_entry = self._cache[cache_key] + if self._is_cache_valid(cache_entry): + logger.debug(f"Using cached overall downloads for: {normalized_name}") + return cache_entry["data"] + + # Make API request + url = f"{self.base_url}/packages/{normalized_name}/overall" + if mirrors is not None: + url += f"?mirrors={'true' if mirrors else 'false'}" + + logger.info(f"Fetching overall downloads for: {normalized_name} (mirrors: {mirrors})") + + try: + data = await self._make_request(url) + + # Cache the result + import time + self._cache[cache_key] = {"data": data, "timestamp": time.time()} + + return data + + except Exception as e: + logger.error(f"Failed to fetch overall downloads for {normalized_name}: {e}") + raise + + def clear_cache(self): + """Clear the internal cache.""" + self._cache.clear() + logger.debug("Stats cache cleared") diff --git a/pypi_query_mcp/server.py b/pypi_query_mcp/server.py index 793f6e9..8c67604 100644 --- a/pypi_query_mcp/server.py +++ b/pypi_query_mcp/server.py @@ -11,6 +11,9 @@ from .tools import ( check_python_compatibility, download_package_with_dependencies, get_compatible_python_versions, + get_package_download_stats, + get_package_download_trends, + get_top_packages_by_downloads, query_package_dependencies, query_package_info, query_package_versions, @@ -407,6 +410,149 @@ async def download_package( } +@mcp.tool() +async def get_download_statistics( + package_name: str, period: str = "month", use_cache: bool = True +) -> dict[str, Any]: + """Get download statistics for a PyPI package. + + This tool retrieves comprehensive download statistics for a Python package, + including recent download counts, trends, and analysis. + + Args: + package_name: The name of the PyPI package to analyze (e.g., 'requests', 'numpy') + period: Time period for recent downloads ('day', 'week', 'month', default: 'month') + use_cache: Whether to use cached data for faster responses (default: True) + + Returns: + Dictionary containing download statistics including: + - Recent download counts (last day/week/month) + - Package metadata and repository information + - Download trends and growth analysis + - Data source and timestamp information + + Raises: + InvalidPackageNameError: If package name is empty or invalid + PackageNotFoundError: If package is not found on PyPI + NetworkError: For network-related errors + """ + try: + logger.info(f"MCP tool: Getting download statistics for {package_name} (period: {period})") + result = await get_package_download_stats(package_name, period, use_cache) + logger.info(f"Successfully retrieved download statistics for package: {package_name}") + return result + except (InvalidPackageNameError, PackageNotFoundError, NetworkError) as e: + logger.error(f"Error getting download statistics for {package_name}: {e}") + return { + "error": str(e), + "error_type": type(e).__name__, + "package_name": package_name, + "period": period, + } + except Exception as e: + logger.error(f"Unexpected error getting download statistics for {package_name}: {e}") + return { + "error": f"Unexpected error: {e}", + "error_type": "UnexpectedError", + "package_name": package_name, + "period": period, + } + + +@mcp.tool() +async def get_download_trends( + package_name: str, include_mirrors: bool = False, use_cache: bool = True +) -> dict[str, Any]: + """Get download trends and time series for a PyPI package. + + This tool retrieves detailed download trends and time series data for a Python package, + providing insights into download patterns over the last 180 days. + + Args: + package_name: The name of the PyPI package to analyze (e.g., 'django', 'flask') + include_mirrors: Whether to include mirror downloads in analysis (default: False) + use_cache: Whether to use cached data for faster responses (default: True) + + Returns: + Dictionary containing download trends including: + - Time series data for the last 180 days + - Trend analysis (increasing/decreasing/stable) + - Peak download periods and statistics + - Average daily downloads and growth indicators + + Raises: + InvalidPackageNameError: If package name is empty or invalid + PackageNotFoundError: If package is not found on PyPI + NetworkError: For network-related errors + """ + try: + logger.info( + f"MCP tool: Getting download trends for {package_name} " + f"(include_mirrors: {include_mirrors})" + ) + result = await get_package_download_trends(package_name, include_mirrors, use_cache) + logger.info(f"Successfully retrieved download trends for package: {package_name}") + return result + except (InvalidPackageNameError, PackageNotFoundError, NetworkError) as e: + logger.error(f"Error getting download trends for {package_name}: {e}") + return { + "error": str(e), + "error_type": type(e).__name__, + "package_name": package_name, + "include_mirrors": include_mirrors, + } + except Exception as e: + logger.error(f"Unexpected error getting download trends for {package_name}: {e}") + return { + "error": f"Unexpected error: {e}", + "error_type": "UnexpectedError", + "package_name": package_name, + "include_mirrors": include_mirrors, + } + + +@mcp.tool() +async def get_top_downloaded_packages( + period: str = "month", limit: int = 20 +) -> dict[str, Any]: + """Get the most downloaded PyPI packages. + + This tool retrieves a list of the most popular Python packages by download count, + helping you discover trending and widely-used packages in the Python ecosystem. + + Args: + period: Time period for download ranking ('day', 'week', 'month', default: 'month') + limit: Maximum number of packages to return (default: 20, max: 50) + + Returns: + Dictionary containing top packages information including: + - Ranked list of packages with download counts + - Package metadata and repository links + - Period and ranking information + - Data source and limitations + + Note: + Due to API limitations, this tool provides results based on known popular packages. + For comprehensive data analysis, consider using Google BigQuery with PyPI datasets. + """ + try: + # Limit the maximum number of packages to prevent excessive API calls + actual_limit = min(limit, 50) + + logger.info(f"MCP tool: Getting top {actual_limit} packages for period: {period}") + result = await get_top_packages_by_downloads(period, actual_limit) + logger.info(f"Successfully retrieved top packages list") + return result + except Exception as e: + logger.error(f"Error getting top packages: {e}") + return { + "error": f"Unexpected error: {e}", + "error_type": "UnexpectedError", + "period": period, + "limit": limit, + } + + @click.command() @click.option( "--log-level", diff --git a/pypi_query_mcp/tools/__init__.py b/pypi_query_mcp/tools/__init__.py index 2d39272..48ef2bd 100644 --- a/pypi_query_mcp/tools/__init__.py +++ b/pypi_query_mcp/tools/__init__.py @@ -10,6 +10,11 @@ from .compatibility_check import ( suggest_python_version_for_packages, ) from .dependency_resolver import resolve_package_dependencies +from .download_stats import ( + get_package_download_stats, + get_package_download_trends, + get_top_packages_by_downloads, +) from .package_downloader import download_package_with_dependencies from .package_query import ( query_package_dependencies, @@ -26,4 +31,7 @@ __all__ = [ "suggest_python_version_for_packages", "resolve_package_dependencies", "download_package_with_dependencies", + "get_package_download_stats", + "get_package_download_trends", + "get_top_packages_by_downloads", ] diff --git a/pypi_query_mcp/tools/download_stats.py b/pypi_query_mcp/tools/download_stats.py new file mode 100644 index 0000000..84e50cd --- /dev/null +++ b/pypi_query_mcp/tools/download_stats.py @@ -0,0 +1,322 @@ +"""PyPI package download statistics tools.""" + +import logging +from datetime import datetime, timedelta +from typing import Any + +from ..core.pypi_client import PyPIClient +from ..core.stats_client import PyPIStatsClient +from ..core.exceptions import InvalidPackageNameError, NetworkError, PackageNotFoundError + +logger = logging.getLogger(__name__) + + +async def get_package_download_stats( + package_name: str, period: str = "month", use_cache: bool = True +) -> dict[str, Any]: + """Get download statistics for a PyPI package. + + Args: + package_name: Name of the package to query + period: Time period for recent downloads ('day', 'week', 'month') + use_cache: Whether to use cached data + + Returns: + Dictionary containing download statistics including: + - Recent download counts (last day/week/month) + - Package metadata + - Download trends and analysis + + Raises: + InvalidPackageNameError: If package name is invalid + PackageNotFoundError: If package is not found + NetworkError: For network-related errors + """ + async with PyPIStatsClient() as stats_client, PyPIClient() as pypi_client: + try: + # Get recent download statistics + recent_stats = await stats_client.get_recent_downloads( + package_name, period, use_cache + ) + + # Get basic package info for metadata + try: + package_info = await pypi_client.get_package_info(package_name, use_cache) + package_metadata = { + "name": package_info.get("info", {}).get("name", package_name), + "version": package_info.get("info", {}).get("version", "unknown"), + "summary": package_info.get("info", {}).get("summary", ""), + "author": package_info.get("info", {}).get("author", ""), + "home_page": package_info.get("info", {}).get("home_page", ""), + "project_url": package_info.get("info", {}).get("project_url", ""), + "project_urls": package_info.get("info", {}).get("project_urls", {}), + } + except Exception as e: + logger.warning(f"Could not fetch package metadata for {package_name}: {e}") + package_metadata = {"name": package_name} + + # Extract download data + download_data = recent_stats.get("data", {}) + + # Calculate trends and analysis + analysis = _analyze_download_stats(download_data) + + return { + "package": package_name, + "metadata": package_metadata, + "downloads": download_data, + "analysis": analysis, + "period": period, + "data_source": "pypistats.org", + "timestamp": datetime.now().isoformat(), + } + + except Exception as e: + logger.error(f"Error getting download stats for {package_name}: {e}") + raise + + +async def get_package_download_trends( + package_name: str, include_mirrors: bool = False, use_cache: bool = True +) -> dict[str, Any]: + """Get download trends and time series for a PyPI package. + + Args: + package_name: Name of the package to query + include_mirrors: Whether to include mirror downloads + use_cache: Whether to use cached data + + Returns: + Dictionary containing download trends including: + - Time series data for the last 180 days + - Trend analysis and statistics + - Peak download periods + + Raises: + InvalidPackageNameError: If package name is invalid + PackageNotFoundError: If package is not found + NetworkError: For network-related errors + """ + async with PyPIStatsClient() as stats_client: + try: + # Get overall download time series + overall_stats = await stats_client.get_overall_downloads( + package_name, include_mirrors, use_cache + ) + + # Process time series data + time_series_data = overall_stats.get("data", []) + + # Analyze trends + trend_analysis = _analyze_download_trends(time_series_data, include_mirrors) + + return { + "package": package_name, + "time_series": time_series_data, + "trend_analysis": trend_analysis, + "include_mirrors": include_mirrors, + "data_source": "pypistats.org", + "timestamp": datetime.now().isoformat(), + } + + except Exception as e: + logger.error(f"Error getting download trends for {package_name}: {e}") + raise + + +async def get_top_packages_by_downloads( + period: str = "month", limit: int = 20 +) -> dict[str, Any]: + """Get top PyPI packages by download count. + + Note: This function provides a simulated response based on known popular packages + since pypistats.org doesn't provide a direct API for top packages. + + Args: + period: Time period ('day', 'week', 'month') + limit: Maximum number of packages to return + + Returns: + Dictionary containing top packages information including: + - List of top packages with download counts + - Period and ranking information + - Data source and timestamp + """ + # Known popular packages (this would ideally come from an API) + popular_packages = [ + "boto3", "urllib3", "requests", "certifi", "charset-normalizer", + "idna", "setuptools", "python-dateutil", "six", "botocore", + "typing-extensions", "packaging", "numpy", "pip", "pyyaml", + "cryptography", "click", "jinja2", "markupsafe", "wheel" + ] + + async with PyPIStatsClient() as stats_client: + try: + top_packages = [] + + # Get download stats for popular packages + for i, package_name in enumerate(popular_packages[:limit]): + try: + stats = await stats_client.get_recent_downloads( + package_name, period, use_cache=True + ) + + download_data = stats.get("data", {}) + download_count = _extract_download_count(download_data, period) + + top_packages.append({ + "rank": i + 1, + "package": package_name, + "downloads": download_count, + "period": period, + }) + + except Exception as e: + logger.warning(f"Could not get stats for {package_name}: {e}") + continue + + # Sort by download count (descending) + top_packages.sort(key=lambda x: x.get("downloads", 0), reverse=True) + + # Update ranks after sorting + for i, package in enumerate(top_packages): + package["rank"] = i + 1 + + return { + "top_packages": top_packages, + "period": period, + "limit": limit, + "total_found": len(top_packages), + "data_source": "pypistats.org", + "note": "Based on known popular packages due to API limitations", + "timestamp": datetime.now().isoformat(), + } + + except Exception as e: + logger.error(f"Error getting top packages: {e}") + raise + + +def _analyze_download_stats(download_data: dict[str, Any]) -> dict[str, Any]: + """Analyze download statistics data. + + Args: + download_data: Raw download data from API + + Returns: + Dictionary containing analysis results + """ + analysis = { + "total_downloads": 0, + "periods_available": [], + "highest_period": None, + "growth_indicators": {}, + } + + if not download_data: + return analysis + + # Extract available periods and counts + for period, count in download_data.items(): + if period.startswith("last_") and isinstance(count, int): + analysis["periods_available"].append(period) + analysis["total_downloads"] += count + + if analysis["highest_period"] is None or count > download_data.get(analysis["highest_period"], 0): + analysis["highest_period"] = period + + # Calculate growth indicators + last_day = download_data.get("last_day", 0) + last_week = download_data.get("last_week", 0) + last_month = download_data.get("last_month", 0) + + if last_day and last_week: + analysis["growth_indicators"]["daily_vs_weekly"] = round(last_day * 7 / last_week, 2) + + if last_week and last_month: + analysis["growth_indicators"]["weekly_vs_monthly"] = round(last_week * 4 / last_month, 2) + + return analysis + + +def _analyze_download_trends(time_series_data: list[dict], include_mirrors: bool) -> dict[str, Any]: + """Analyze download trends from time series data. + + Args: + time_series_data: Time series download data + include_mirrors: Whether mirrors are included + + Returns: + Dictionary containing trend analysis + """ + analysis = { + "total_downloads": 0, + "data_points": len(time_series_data), + "date_range": {}, + "peak_day": None, + "average_daily": 0, + "trend_direction": "stable", + } + + if not time_series_data: + return analysis + + # Filter data based on mirror preference + category_filter = "with_mirrors" if include_mirrors else "without_mirrors" + filtered_data = [ + item for item in time_series_data + if item.get("category") == category_filter + ] + + if not filtered_data: + return analysis + + # Calculate statistics + total_downloads = sum(item.get("downloads", 0) for item in filtered_data) + analysis["total_downloads"] = total_downloads + analysis["data_points"] = len(filtered_data) + + if filtered_data: + dates = [item.get("date") for item in filtered_data if item.get("date")] + if dates: + analysis["date_range"] = { + "start": min(dates), + "end": max(dates), + } + + # Find peak day + peak_item = max(filtered_data, key=lambda x: x.get("downloads", 0)) + analysis["peak_day"] = { + "date": peak_item.get("date"), + "downloads": peak_item.get("downloads", 0), + } + + # Calculate average + if len(filtered_data) > 0: + analysis["average_daily"] = round(total_downloads / len(filtered_data), 2) + + # Simple trend analysis (compare first and last week) + if len(filtered_data) >= 14: + first_week = sum(item.get("downloads", 0) for item in filtered_data[:7]) + last_week = sum(item.get("downloads", 0) for item in filtered_data[-7:]) + + if last_week > first_week * 1.1: + analysis["trend_direction"] = "increasing" + elif last_week < first_week * 0.9: + analysis["trend_direction"] = "decreasing" + + return analysis + + +def _extract_download_count(download_data: dict[str, Any], period: str) -> int: + """Extract download count for a specific period. + + Args: + download_data: Download data from API + period: Period to extract ('day', 'week', 'month') + + Returns: + Download count for the specified period + """ + period_key = f"last_{period}" + return download_data.get(period_key, 0) diff --git a/tests/test_download_stats.py b/tests/test_download_stats.py new file mode 100644 index 0000000..d9f3944 --- /dev/null +++ b/tests/test_download_stats.py @@ -0,0 +1,199 @@ +"""Tests for download statistics functionality.""" + +import pytest +from unittest.mock import AsyncMock, patch + +from pypi_query_mcp.tools.download_stats import ( + get_package_download_stats, + get_package_download_trends, + get_top_packages_by_downloads, + _analyze_download_stats, + _analyze_download_trends, + _extract_download_count, +) +from pypi_query_mcp.core.exceptions import PackageNotFoundError, InvalidPackageNameError + + +class TestDownloadStats: + """Test download statistics functionality.""" + + @pytest.mark.asyncio + async def test_get_package_download_stats_success(self): + """Test successful package download stats retrieval.""" + mock_stats_data = { + "data": { + "last_day": 1000, + "last_week": 7000, + "last_month": 30000, + }, + "package": "test-package", + "type": "recent_downloads", + } + + mock_package_info = { + "info": { + "name": "test-package", + "version": "1.0.0", + "summary": "A test package", + "author": "Test Author", + "home_page": "https://example.com", + "project_urls": {"Repository": "https://github.com/test/test-package"}, + } + } + + with patch("pypi_query_mcp.tools.download_stats.PyPIStatsClient") as mock_stats_client, \ + patch("pypi_query_mcp.tools.download_stats.PyPIClient") as mock_pypi_client: + + # Setup mocks + mock_stats_instance = AsyncMock() + mock_stats_instance.get_recent_downloads.return_value = mock_stats_data + mock_stats_client.return_value.__aenter__.return_value = mock_stats_instance + + mock_pypi_instance = AsyncMock() + mock_pypi_instance.get_package_info.return_value = mock_package_info + mock_pypi_client.return_value.__aenter__.return_value = mock_pypi_instance + + # Test the function + result = await get_package_download_stats("test-package", "month") + + # Assertions + assert result["package"] == "test-package" + assert result["downloads"]["last_month"] == 30000 + assert result["metadata"]["name"] == "test-package" + assert result["metadata"]["version"] == "1.0.0" + assert result["period"] == "month" + assert "analysis" in result + assert "timestamp" in result + + @pytest.mark.asyncio + async def test_get_package_download_stats_package_not_found(self): + """Test package download stats with non-existent package.""" + with patch("pypi_query_mcp.tools.download_stats.PyPIStatsClient") as mock_stats_client: + mock_stats_instance = AsyncMock() + mock_stats_instance.get_recent_downloads.side_effect = PackageNotFoundError("nonexistent") + mock_stats_client.return_value.__aenter__.return_value = mock_stats_instance + + with pytest.raises(PackageNotFoundError): + await get_package_download_stats("nonexistent", "month") + + @pytest.mark.asyncio + async def test_get_package_download_trends_success(self): + """Test successful package download trends retrieval.""" + mock_trends_data = { + "data": [ + {"category": "without_mirrors", "date": "2024-01-01", "downloads": 1000}, + {"category": "without_mirrors", "date": "2024-01-02", "downloads": 1200}, + {"category": "with_mirrors", "date": "2024-01-01", "downloads": 1100}, + {"category": "with_mirrors", "date": "2024-01-02", "downloads": 1300}, + ], + "package": "test-package", + "type": "overall_downloads", + } + + with patch("pypi_query_mcp.tools.download_stats.PyPIStatsClient") as mock_stats_client: + mock_stats_instance = AsyncMock() + mock_stats_instance.get_overall_downloads.return_value = mock_trends_data + mock_stats_client.return_value.__aenter__.return_value = mock_stats_instance + + result = await get_package_download_trends("test-package", include_mirrors=False) + + assert result["package"] == "test-package" + assert result["include_mirrors"] is False + assert len(result["time_series"]) == 4 + assert "trend_analysis" in result + assert result["trend_analysis"]["data_points"] == 2 # Only without_mirrors data + + @pytest.mark.asyncio + async def test_get_top_packages_by_downloads_success(self): + """Test successful top packages retrieval.""" + mock_stats_data = { + "data": { + "last_month": 50000000, + }, + "package": "boto3", + "type": "recent_downloads", + } + + with patch("pypi_query_mcp.tools.download_stats.PyPIStatsClient") as mock_stats_client: + mock_stats_instance = AsyncMock() + mock_stats_instance.get_recent_downloads.return_value = mock_stats_data + mock_stats_client.return_value.__aenter__.return_value = mock_stats_instance + + result = await get_top_packages_by_downloads("month", 5) + + assert "top_packages" in result + assert result["period"] == "month" + assert result["limit"] == 5 + assert len(result["top_packages"]) <= 5 + assert all("rank" in pkg for pkg in result["top_packages"]) + assert all("package" in pkg for pkg in result["top_packages"]) + assert all("downloads" in pkg for pkg in result["top_packages"]) + + def test_analyze_download_stats(self): + """Test download statistics analysis.""" + download_data = { + "last_day": 1000, + "last_week": 7000, + "last_month": 30000, + } + + analysis = _analyze_download_stats(download_data) + + assert analysis["total_downloads"] == 38000 + assert "last_day" in analysis["periods_available"] + assert "last_week" in analysis["periods_available"] + assert "last_month" in analysis["periods_available"] + assert analysis["highest_period"] == "last_month" + assert "growth_indicators" in analysis + + def test_analyze_download_stats_empty(self): + """Test download statistics analysis with empty data.""" + analysis = _analyze_download_stats({}) + + assert analysis["total_downloads"] == 0 + assert analysis["periods_available"] == [] + assert analysis["highest_period"] is None + assert analysis["growth_indicators"] == {} + + def test_analyze_download_trends(self): + """Test download trends analysis.""" + time_series_data = [ + {"category": "without_mirrors", "date": "2024-01-01", "downloads": 1000}, + {"category": "without_mirrors", "date": "2024-01-02", "downloads": 1200}, + {"category": "without_mirrors", "date": "2024-01-03", "downloads": 1100}, + ] + + analysis = _analyze_download_trends(time_series_data, include_mirrors=False) + + assert analysis["total_downloads"] == 3300 + assert analysis["data_points"] == 3 + assert analysis["average_daily"] == 1100.0 + assert analysis["peak_day"]["downloads"] == 1200 + assert analysis["peak_day"]["date"] == "2024-01-02" + assert "date_range" in analysis + + def test_analyze_download_trends_empty(self): + """Test download trends analysis with empty data.""" + analysis = _analyze_download_trends([], include_mirrors=False) + + assert analysis["total_downloads"] == 0 + assert analysis["data_points"] == 0 + assert analysis["average_daily"] == 0 + assert analysis["peak_day"] is None + + def test_extract_download_count(self): + """Test download count extraction.""" + download_data = { + "last_day": 1000, + "last_week": 7000, + "last_month": 30000, + } + + assert _extract_download_count(download_data, "day") == 1000 + assert _extract_download_count(download_data, "week") == 7000 + assert _extract_download_count(download_data, "month") == 30000 + assert _extract_download_count(download_data, "year") == 0 # Not present + + def test_extract_download_count_empty(self): + """Test download count extraction with empty data.""" + assert _extract_download_count({}, "month") == 0