- Implemented PyPISearchClient with semantic search, filtering, and sorting - Added 4 new search tools: search_packages, search_by_category, find_alternatives, get_trending_packages - Created SearchFilter and SearchSort classes for flexible configuration - Added SearchError exception for search-specific error handling - Comprehensive test suite with 13 tests covering all search functionality - Enhanced MCP server with 4 new search endpoints - Support for filtering by Python version, license, category, downloads, maintenance status - Multiple sorting options: relevance, popularity, quality, recency, name, downloads - Semantic search using description similarity scoring - Category-based package discovery with intelligent keyword matching - Package alternatives finder using metadata analysis - Trending packages analysis with download activity tracking - Robust fallback mechanisms using curated package database - All tests passing (13/13) This implements feature #6 from the roadmap: "Advanced PyPI Search with filtering by Python version, license, maintenance status and sorting by popularity, recency, quality score with semantic search capabilities"
309 lines
12 KiB
Python
309 lines
12 KiB
Python
"""PyPI search tools with advanced filtering and sorting capabilities."""
|
|
|
|
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from ..core.exceptions import InvalidPackageNameError, SearchError
|
|
from ..core.search_client import PyPISearchClient, SearchFilter, SearchSort
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def search_packages(
|
|
query: str,
|
|
limit: int = 20,
|
|
python_versions: Optional[List[str]] = None,
|
|
licenses: Optional[List[str]] = None,
|
|
categories: Optional[List[str]] = None,
|
|
min_downloads: Optional[int] = None,
|
|
maintenance_status: Optional[str] = None,
|
|
has_wheels: Optional[bool] = None,
|
|
sort_by: str = "relevance",
|
|
sort_desc: bool = True,
|
|
semantic_search: bool = False,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Search PyPI packages with advanced filtering and sorting.
|
|
|
|
Args:
|
|
query: Search query string
|
|
limit: Maximum number of results to return (default: 20)
|
|
python_versions: List of Python versions to filter by (e.g., ["3.9", "3.10"])
|
|
licenses: List of license types to filter by (e.g., ["mit", "apache", "bsd"])
|
|
categories: List of categories to filter by (e.g., ["web", "data-science"])
|
|
min_downloads: Minimum monthly downloads threshold
|
|
maintenance_status: Filter by maintenance status ("active", "maintained", "stale", "abandoned")
|
|
has_wheels: Filter packages that have/don't have wheel distributions
|
|
sort_by: Sort field ("relevance", "popularity", "recency", "quality", "name", "downloads")
|
|
sort_desc: Sort in descending order (default: True)
|
|
semantic_search: Use semantic search on package descriptions (default: False)
|
|
|
|
Returns:
|
|
Dictionary containing search results and metadata
|
|
|
|
Raises:
|
|
InvalidPackageNameError: If search query is invalid
|
|
SearchError: If search operation fails
|
|
"""
|
|
if not query or not query.strip():
|
|
raise InvalidPackageNameError("Search query cannot be empty")
|
|
|
|
if limit <= 0 or limit > 100:
|
|
limit = 20
|
|
|
|
logger.info(f"Searching PyPI: '{query}' (limit: {limit}, sort: {sort_by})")
|
|
|
|
try:
|
|
# Create search filters
|
|
filters = SearchFilter(
|
|
python_versions=python_versions,
|
|
licenses=licenses,
|
|
categories=categories,
|
|
min_downloads=min_downloads,
|
|
maintenance_status=maintenance_status,
|
|
has_wheels=has_wheels,
|
|
)
|
|
|
|
# Create sort configuration
|
|
sort = SearchSort(field=sort_by, reverse=sort_desc)
|
|
|
|
# Perform search
|
|
async with PyPISearchClient() as search_client:
|
|
result = await search_client.search_packages(
|
|
query=query,
|
|
limit=limit,
|
|
filters=filters,
|
|
sort=sort,
|
|
semantic_search=semantic_search,
|
|
)
|
|
|
|
return result
|
|
|
|
except SearchError:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error during search: {e}")
|
|
raise SearchError(f"Search failed: {e}") from e
|
|
|
|
|
|
async def search_by_category(
|
|
category: str,
|
|
limit: int = 20,
|
|
sort_by: str = "popularity",
|
|
python_version: Optional[str] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Search packages by category with popularity sorting.
|
|
|
|
Args:
|
|
category: Category to search for (e.g., "web", "data-science", "testing")
|
|
limit: Maximum number of results to return
|
|
sort_by: Sort field (default: "popularity")
|
|
python_version: Filter by Python version compatibility
|
|
|
|
Returns:
|
|
Dictionary containing categorized search results
|
|
"""
|
|
logger.info(f"Searching category: '{category}' (limit: {limit})")
|
|
|
|
# Map category to search query and filters
|
|
category_queries = {
|
|
"web": "web framework flask django fastapi",
|
|
"data-science": "data science machine learning pandas numpy",
|
|
"database": "database sql orm sqlite postgres mysql",
|
|
"testing": "testing pytest unittest mock coverage",
|
|
"cli": "command line interface cli argparse click",
|
|
"security": "security encryption crypto ssl authentication",
|
|
"networking": "network http requests urllib socket",
|
|
"dev-tools": "development tools build package deploy",
|
|
"cloud": "cloud aws azure gcp docker kubernetes",
|
|
"gui": "gui interface tkinter qt desktop",
|
|
}
|
|
|
|
query = category_queries.get(category.lower(), category)
|
|
|
|
return await search_packages(
|
|
query=query,
|
|
limit=limit,
|
|
categories=[category.lower()],
|
|
python_versions=[python_version] if python_version else None,
|
|
sort_by=sort_by,
|
|
semantic_search=True,
|
|
)
|
|
|
|
|
|
async def find_alternatives(
|
|
package_name: str,
|
|
limit: int = 10,
|
|
include_similar: bool = True,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Find alternative packages to a given package.
|
|
|
|
Args:
|
|
package_name: Name of the package to find alternatives for
|
|
limit: Maximum number of alternatives to return
|
|
include_similar: Include packages with similar functionality
|
|
|
|
Returns:
|
|
Dictionary containing alternative packages and analysis
|
|
"""
|
|
logger.info(f"Finding alternatives for: '{package_name}'")
|
|
|
|
try:
|
|
# First, get information about the target package
|
|
from ..core.pypi_client import PyPIClient
|
|
|
|
async with PyPIClient() as client:
|
|
package_data = await client.get_package_info(package_name)
|
|
|
|
info = package_data["info"]
|
|
keywords = info.get("keywords", "")
|
|
summary = info.get("summary", "")
|
|
categories = info.get("classifiers", [])
|
|
|
|
# Extract category information
|
|
category_terms = []
|
|
for classifier in categories:
|
|
if "Topic ::" in classifier:
|
|
topic = classifier.split("Topic ::")[-1].strip().lower()
|
|
category_terms.append(topic)
|
|
|
|
# Create search query from package metadata
|
|
search_terms = []
|
|
if keywords:
|
|
search_terms.extend(keywords.split())
|
|
if summary:
|
|
# Extract key terms from summary
|
|
summary_words = [w for w in summary.lower().split() if len(w) > 3]
|
|
search_terms.extend(summary_words[:5])
|
|
|
|
search_query = " ".join(search_terms[:8]) # Limit to most relevant terms
|
|
|
|
if not search_query:
|
|
search_query = package_name # Fallback to package name
|
|
|
|
# Search for alternatives
|
|
results = await search_packages(
|
|
query=search_query,
|
|
limit=limit + 5, # Get extra to filter out the original package
|
|
sort_by="popularity",
|
|
semantic_search=include_similar,
|
|
)
|
|
|
|
# Filter out the original package
|
|
alternatives = []
|
|
for pkg in results["packages"]:
|
|
if pkg["name"].lower() != package_name.lower():
|
|
alternatives.append(pkg)
|
|
|
|
alternatives = alternatives[:limit]
|
|
|
|
return {
|
|
"target_package": {
|
|
"name": package_name,
|
|
"summary": summary,
|
|
"keywords": keywords,
|
|
"categories": category_terms,
|
|
},
|
|
"alternatives": alternatives,
|
|
"search_query_used": search_query,
|
|
"total_alternatives": len(alternatives),
|
|
"analysis": {
|
|
"search_method": "keyword_similarity" if search_terms else "name_based",
|
|
"semantic_search_used": include_similar,
|
|
"category_based": len(category_terms) > 0,
|
|
},
|
|
"timestamp": results["timestamp"],
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding alternatives for {package_name}: {e}")
|
|
raise SearchError(f"Failed to find alternatives: {e}") from e
|
|
|
|
|
|
async def get_trending_packages(
|
|
category: Optional[str] = None,
|
|
time_period: str = "week",
|
|
limit: int = 20,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Get trending packages based on recent download activity.
|
|
|
|
Args:
|
|
category: Optional category filter
|
|
time_period: Time period for trending analysis ("day", "week", "month")
|
|
limit: Maximum number of packages to return
|
|
|
|
Returns:
|
|
Dictionary containing trending packages
|
|
"""
|
|
logger.info(f"Getting trending packages: category={category}, period={time_period}")
|
|
|
|
try:
|
|
# Use our top packages functionality as a base
|
|
from .download_stats import get_top_packages_by_downloads
|
|
|
|
top_packages_result = await get_top_packages_by_downloads(period=time_period, limit=limit * 2)
|
|
|
|
# Filter by category if specified
|
|
if category:
|
|
# Enhance with category information
|
|
enhanced_packages = []
|
|
for pkg in top_packages_result["top_packages"]:
|
|
try:
|
|
# Get package metadata for category classification
|
|
from ..core.pypi_client import PyPIClient
|
|
async with PyPIClient() as client:
|
|
package_data = await client.get_package_info(pkg["package"])
|
|
|
|
# Simple category matching
|
|
info = package_data["info"]
|
|
text = f"{info.get('keywords', '')} {info.get('summary', '')}".lower()
|
|
|
|
category_keywords = {
|
|
"web": ["web framework", "web", "flask", "django", "fastapi", "wsgi", "asgi"],
|
|
"data-science": ["data", "science", "pandas", "numpy", "ml"],
|
|
"database": ["database", "sql", "orm"],
|
|
"testing": ["test", "pytest", "mock"],
|
|
"cli": ["cli", "command", "argparse", "click"],
|
|
}
|
|
|
|
if category.lower() in category_keywords:
|
|
keywords = category_keywords[category.lower()]
|
|
# For web category, be more specific to avoid HTTP clients
|
|
if category.lower() == "web":
|
|
web_patterns = ["web framework", "micro web", "flask", "django", "fastapi", "wsgi", "asgi"]
|
|
match_found = any(pattern in text for pattern in web_patterns)
|
|
else:
|
|
match_found = any(keyword in text for keyword in keywords)
|
|
|
|
if match_found:
|
|
enhanced_packages.append({
|
|
**pkg,
|
|
"category_match": True,
|
|
"summary": info.get("summary", ""),
|
|
})
|
|
except:
|
|
continue
|
|
|
|
trending_packages = enhanced_packages[:limit]
|
|
else:
|
|
trending_packages = top_packages_result["top_packages"][:limit]
|
|
|
|
return {
|
|
"trending_packages": trending_packages,
|
|
"time_period": time_period,
|
|
"category": category,
|
|
"total_found": len(trending_packages),
|
|
"analysis": {
|
|
"source": "download_statistics",
|
|
"category_filtered": category is not None,
|
|
"methodology": "Based on download counts and popularity metrics",
|
|
},
|
|
"timestamp": top_packages_result["timestamp"],
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting trending packages: {e}")
|
|
raise SearchError(f"Failed to get trending packages: {e}") from e |