Compare commits
No commits in common. "43f36b60fbe35c02cf82319f339817e92c0b6f4b" and "9924df34ec1107a44893a33d5e8559517ddd1b15" have entirely different histories.
43f36b60fb
...
9924df34ec
28
poetry.lock
generated
28
poetry.lock
generated
@ -691,21 +691,6 @@ rich = ">=13.9.4"
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
websockets = ["websockets (>=15.0.1)"]
|
websockets = ["websockets (>=15.0.1)"]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "feedparser"
|
|
||||||
version = "6.0.11"
|
|
||||||
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.6"
|
|
||||||
groups = ["main"]
|
|
||||||
files = [
|
|
||||||
{file = "feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45"},
|
|
||||||
{file = "feedparser-6.0.11.tar.gz", hash = "sha256:c9d0407b64c6f2a065d0ebb292c2b35c01050cc0dc33757461aaabdc4c4184d5"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
sgmllib3k = "*"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filelock"
|
name = "filelock"
|
||||||
version = "3.19.1"
|
version = "3.19.1"
|
||||||
@ -2009,17 +1994,6 @@ files = [
|
|||||||
{file = "ruff-0.12.9.tar.gz", hash = "sha256:fbd94b2e3c623f659962934e52c2bea6fc6da11f667a427a368adaf3af2c866a"},
|
{file = "ruff-0.12.9.tar.gz", hash = "sha256:fbd94b2e3c623f659962934e52c2bea6fc6da11f667a427a368adaf3af2c866a"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sgmllib3k"
|
|
||||||
version = "1.0.0"
|
|
||||||
description = "Py3k port of sgmllib."
|
|
||||||
optional = false
|
|
||||||
python-versions = "*"
|
|
||||||
groups = ["main"]
|
|
||||||
files = [
|
|
||||||
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "six"
|
name = "six"
|
||||||
version = "1.17.0"
|
version = "1.17.0"
|
||||||
@ -2276,4 +2250,4 @@ watchdog = ["watchdog (>=2.3)"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "13bc4176d567d6738ca9ca5ebd67565f8526853434911137f4b51b39e275a546"
|
content-hash = "9785e18d2d996f5e58e1b06c722f6de31c445a1a83528f39227d1c373b91f989"
|
||||||
|
@ -8,10 +8,6 @@ __version__ = "0.1.0"
|
|||||||
__author__ = "Hal"
|
__author__ = "Hal"
|
||||||
__email__ = "hal.long@outlook.com"
|
__email__ = "hal.long@outlook.com"
|
||||||
|
|
||||||
try:
|
from pypi_query_mcp.server import mcp
|
||||||
from pypi_query_mcp.server import mcp
|
|
||||||
__all__ = ["mcp", "__version__"]
|
__all__ = ["mcp", "__version__"]
|
||||||
except ImportError:
|
|
||||||
# Server dependencies not available (fastmcp, etc.)
|
|
||||||
# Tools can still be imported individually
|
|
||||||
__all__ = ["__version__"]
|
|
||||||
|
@ -126,42 +126,20 @@ class PyPISearchClient:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Use PyPI's search API as the primary source
|
# Use PyPI's search API as the primary source
|
||||||
try:
|
pypi_results = await self._search_pypi_api(query, limit * 3) # Get more for filtering
|
||||||
pypi_results = await self._search_pypi_api(query, limit * 3) # Get more for filtering
|
|
||||||
logger.info(f"Got {len(pypi_results)} raw results from PyPI API")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"PyPI API search failed: {e}")
|
|
||||||
pypi_results = []
|
|
||||||
|
|
||||||
# Enhance results with additional metadata
|
# Enhance results with additional metadata
|
||||||
try:
|
enhanced_results = await self._enhance_search_results(pypi_results)
|
||||||
enhanced_results = await self._enhance_search_results(pypi_results)
|
|
||||||
logger.info(f"Enhanced to {len(enhanced_results)} results")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Enhancement failed: {e}")
|
|
||||||
enhanced_results = pypi_results
|
|
||||||
|
|
||||||
# Apply filters
|
# Apply filters
|
||||||
try:
|
filtered_results = self._apply_filters(enhanced_results, filters)
|
||||||
filtered_results = self._apply_filters(enhanced_results, filters)
|
|
||||||
logger.info(f"Filtered to {len(filtered_results)} results")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Filtering failed: {e}")
|
|
||||||
filtered_results = enhanced_results
|
|
||||||
|
|
||||||
# Apply semantic search if requested
|
# Apply semantic search if requested
|
||||||
if semantic_search:
|
if semantic_search:
|
||||||
try:
|
filtered_results = self._apply_semantic_search(filtered_results, query)
|
||||||
filtered_results = self._apply_semantic_search(filtered_results, query)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Semantic search failed: {e}")
|
|
||||||
|
|
||||||
# Sort results
|
# Sort results
|
||||||
try:
|
sorted_results = self._sort_results(filtered_results, sort)
|
||||||
sorted_results = self._sort_results(filtered_results, sort)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Sorting failed: {e}")
|
|
||||||
sorted_results = filtered_results
|
|
||||||
|
|
||||||
# Limit results
|
# Limit results
|
||||||
final_results = sorted_results[:limit]
|
final_results = sorted_results[:limit]
|
||||||
@ -183,318 +161,72 @@ class PyPISearchClient:
|
|||||||
raise SearchError(f"Search failed: {e}") from e
|
raise SearchError(f"Search failed: {e}") from e
|
||||||
|
|
||||||
async def _search_pypi_api(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
async def _search_pypi_api(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||||
"""Search using available PyPI methods - no native search API exists."""
|
"""Search using PyPI's official search API."""
|
||||||
logger.info(f"PyPI has no native search API, using curated search for: '{query}'")
|
url = "https://pypi.org/search/"
|
||||||
|
params = {
|
||||||
|
"q": query,
|
||||||
|
"page": 1,
|
||||||
|
}
|
||||||
|
|
||||||
# PyPI doesn't have a search API, so we'll use our curated approach
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||||
# combined with direct package lookups for exact matches
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# First: try direct package lookup (exact match)
|
|
||||||
try:
|
|
||||||
direct_result = await self._try_direct_package_lookup(query)
|
|
||||||
if direct_result:
|
|
||||||
results.extend(direct_result)
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Direct lookup failed: {e}")
|
|
||||||
|
|
||||||
# Second: search curated packages
|
|
||||||
try:
|
|
||||||
curated_results = await self._search_curated_packages(query, limit)
|
|
||||||
# Add curated results that aren't already in the list
|
|
||||||
existing_names = {r["name"].lower() for r in results}
|
|
||||||
for result in curated_results:
|
|
||||||
if result["name"].lower() not in existing_names:
|
|
||||||
results.append(result)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Curated search failed: {e}")
|
|
||||||
|
|
||||||
return results[:limit]
|
|
||||||
|
|
||||||
async def _try_direct_package_lookup(self, query: str) -> List[Dict[str, Any]]:
|
|
||||||
"""Try to get package info directly using PyPI JSON API."""
|
|
||||||
candidates = [
|
|
||||||
query.strip(),
|
|
||||||
query.strip().lower(),
|
|
||||||
query.strip().replace(" ", "-"),
|
|
||||||
query.strip().replace(" ", "_"),
|
|
||||||
query.strip().replace("_", "-"),
|
|
||||||
query.strip().replace("-", "_"),
|
|
||||||
]
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for candidate in candidates:
|
|
||||||
try:
|
try:
|
||||||
async with PyPIClient() as client:
|
response = await client.get(url, params=params)
|
||||||
package_data = await client.get_package_info(candidate)
|
response.raise_for_status()
|
||||||
|
|
||||||
results.append({
|
|
||||||
"name": package_data["info"]["name"],
|
|
||||||
"summary": package_data["info"]["summary"] or "",
|
|
||||||
"version": package_data["info"]["version"],
|
|
||||||
"source": "direct_api",
|
|
||||||
"description": package_data["info"]["description"] or "",
|
|
||||||
"author": package_data["info"]["author"] or "",
|
|
||||||
"license": package_data["info"]["license"] or "",
|
|
||||||
"home_page": package_data["info"]["home_page"] or "",
|
|
||||||
"requires_python": package_data["info"]["requires_python"] or "",
|
|
||||||
"classifiers": package_data["info"]["classifiers"] or [],
|
|
||||||
"keywords": package_data["info"]["keywords"] or "",
|
|
||||||
})
|
|
||||||
break # Found exact match, stop looking
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
continue # Try next candidate
|
|
||||||
|
|
||||||
return results
|
# Parse the HTML response (PyPI search returns HTML)
|
||||||
|
return await self._parse_search_html(response.text, limit)
|
||||||
async def _search_curated_packages(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
|
||||||
"""Search our curated package database."""
|
except httpx.HTTPError as e:
|
||||||
from ..data.popular_packages import ALL_POPULAR_PACKAGES
|
logger.error(f"PyPI search API error: {e}")
|
||||||
|
# Fallback to alternative search method
|
||||||
curated_matches = []
|
return await self._fallback_search(query, limit)
|
||||||
query_lower = query.lower()
|
|
||||||
|
|
||||||
logger.info(f"Searching {len(ALL_POPULAR_PACKAGES)} curated packages for '{query}'")
|
|
||||||
|
|
||||||
# First: exact name matches
|
|
||||||
for pkg in ALL_POPULAR_PACKAGES:
|
|
||||||
if query_lower == pkg.name.lower():
|
|
||||||
curated_matches.append({
|
|
||||||
"name": pkg.name,
|
|
||||||
"summary": pkg.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_exact",
|
|
||||||
"category": pkg.category,
|
|
||||||
"estimated_downloads": pkg.estimated_monthly_downloads,
|
|
||||||
"github_stars": pkg.github_stars,
|
|
||||||
"primary_use_case": pkg.primary_use_case,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Second: name contains query (if not too many exact matches)
|
|
||||||
if len(curated_matches) < limit:
|
|
||||||
for pkg in ALL_POPULAR_PACKAGES:
|
|
||||||
if (query_lower in pkg.name.lower() and
|
|
||||||
pkg.name not in [m["name"] for m in curated_matches]):
|
|
||||||
curated_matches.append({
|
|
||||||
"name": pkg.name,
|
|
||||||
"summary": pkg.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_name",
|
|
||||||
"category": pkg.category,
|
|
||||||
"estimated_downloads": pkg.estimated_monthly_downloads,
|
|
||||||
"github_stars": pkg.github_stars,
|
|
||||||
"primary_use_case": pkg.primary_use_case,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Third: description or use case matches (if still need more results)
|
|
||||||
if len(curated_matches) < limit:
|
|
||||||
for pkg in ALL_POPULAR_PACKAGES:
|
|
||||||
if ((query_lower in pkg.description.lower() or
|
|
||||||
query_lower in pkg.primary_use_case.lower()) and
|
|
||||||
pkg.name not in [m["name"] for m in curated_matches]):
|
|
||||||
curated_matches.append({
|
|
||||||
"name": pkg.name,
|
|
||||||
"summary": pkg.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_desc",
|
|
||||||
"category": pkg.category,
|
|
||||||
"estimated_downloads": pkg.estimated_monthly_downloads,
|
|
||||||
"github_stars": pkg.github_stars,
|
|
||||||
"primary_use_case": pkg.primary_use_case,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Sort by popularity (downloads)
|
|
||||||
curated_matches.sort(key=lambda x: x.get("estimated_downloads", 0), reverse=True)
|
|
||||||
|
|
||||||
logger.info(f"Found {len(curated_matches)} curated matches")
|
|
||||||
return curated_matches[:limit]
|
|
||||||
|
|
||||||
async def _fallback_search(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
async def _fallback_search(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||||
"""Fallback search using PyPI JSON API and our curated data."""
|
"""Fallback search using PyPI JSON API and our curated data."""
|
||||||
|
from ..data.popular_packages import PACKAGES_BY_NAME, get_popular_packages
|
||||||
|
|
||||||
|
# Search in our curated packages first
|
||||||
|
curated_matches = []
|
||||||
|
query_lower = query.lower()
|
||||||
|
|
||||||
|
for package_info in get_popular_packages(limit=1000):
|
||||||
|
name_match = query_lower in package_info.name.lower()
|
||||||
|
desc_match = query_lower in package_info.description.lower()
|
||||||
|
|
||||||
|
if name_match or desc_match:
|
||||||
|
curated_matches.append({
|
||||||
|
"name": package_info.name,
|
||||||
|
"summary": package_info.description,
|
||||||
|
"version": "unknown",
|
||||||
|
"source": "curated",
|
||||||
|
"category": package_info.category,
|
||||||
|
"estimated_downloads": package_info.estimated_monthly_downloads,
|
||||||
|
})
|
||||||
|
|
||||||
|
# If we have some matches, return them
|
||||||
|
if curated_matches:
|
||||||
|
return curated_matches[:limit]
|
||||||
|
|
||||||
|
# Last resort: try simple package name search
|
||||||
try:
|
try:
|
||||||
from ..data.popular_packages import PACKAGES_BY_NAME, get_popular_packages, ALL_POPULAR_PACKAGES
|
async with PyPIClient() as client:
|
||||||
|
# Try to get the package directly if it's an exact match
|
||||||
# Search in our curated packages first
|
try:
|
||||||
curated_matches = []
|
|
||||||
query_lower = query.lower()
|
|
||||||
|
|
||||||
logger.info(f"Searching in {len(ALL_POPULAR_PACKAGES)} curated packages for '{query}'")
|
|
||||||
|
|
||||||
# First: exact name matches
|
|
||||||
for package_info in ALL_POPULAR_PACKAGES:
|
|
||||||
if query_lower == package_info.name.lower():
|
|
||||||
curated_matches.append({
|
|
||||||
"name": package_info.name,
|
|
||||||
"summary": package_info.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_exact",
|
|
||||||
"category": package_info.category,
|
|
||||||
"estimated_downloads": package_info.estimated_monthly_downloads,
|
|
||||||
"github_stars": package_info.github_stars,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Second: name contains query
|
|
||||||
for package_info in ALL_POPULAR_PACKAGES:
|
|
||||||
if (query_lower in package_info.name.lower() and
|
|
||||||
package_info.name not in [m["name"] for m in curated_matches]):
|
|
||||||
curated_matches.append({
|
|
||||||
"name": package_info.name,
|
|
||||||
"summary": package_info.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_name",
|
|
||||||
"category": package_info.category,
|
|
||||||
"estimated_downloads": package_info.estimated_monthly_downloads,
|
|
||||||
"github_stars": package_info.github_stars,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Third: description or use case matches
|
|
||||||
for package_info in ALL_POPULAR_PACKAGES:
|
|
||||||
if ((query_lower in package_info.description.lower() or
|
|
||||||
query_lower in package_info.primary_use_case.lower()) and
|
|
||||||
package_info.name not in [m["name"] for m in curated_matches]):
|
|
||||||
curated_matches.append({
|
|
||||||
"name": package_info.name,
|
|
||||||
"summary": package_info.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_desc",
|
|
||||||
"category": package_info.category,
|
|
||||||
"estimated_downloads": package_info.estimated_monthly_downloads,
|
|
||||||
"github_stars": package_info.github_stars,
|
|
||||||
})
|
|
||||||
|
|
||||||
logger.info(f"Found {len(curated_matches)} curated matches")
|
|
||||||
|
|
||||||
# If we have some matches, return them (sorted by popularity)
|
|
||||||
if curated_matches:
|
|
||||||
curated_matches.sort(key=lambda x: x.get("estimated_downloads", 0), reverse=True)
|
|
||||||
return curated_matches[:limit]
|
|
||||||
|
|
||||||
# Last resort: try direct package lookup
|
|
||||||
logger.info("No curated matches, trying direct package lookup")
|
|
||||||
try:
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(query)
|
package_data = await client.get_package_info(query)
|
||||||
return [{
|
return [{
|
||||||
"name": package_data["info"]["name"],
|
"name": package_data["info"]["name"],
|
||||||
"summary": package_data["info"]["summary"] or "",
|
"summary": package_data["info"]["summary"] or "",
|
||||||
"version": package_data["info"]["version"],
|
"version": package_data["info"]["version"],
|
||||||
"source": "direct_fallback",
|
"source": "direct",
|
||||||
"description": package_data["info"]["description"] or "",
|
|
||||||
"author": package_data["info"]["author"] or "",
|
|
||||||
}]
|
}]
|
||||||
except Exception as e:
|
except:
|
||||||
logger.info(f"Direct lookup failed: {e}")
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Fallback search failed: {e}")
|
logger.warning(f"Fallback search failed: {e}")
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def _search_xmlrpc(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
|
||||||
"""Search using enhanced curated search with fuzzy matching."""
|
|
||||||
# Since PyPI XML-RPC search is deprecated, use our enhanced curated search
|
|
||||||
try:
|
|
||||||
from ..data.popular_packages import get_popular_packages, ALL_POPULAR_PACKAGES
|
|
||||||
|
|
||||||
query_lower = query.lower()
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# First pass: exact name matches
|
|
||||||
for pkg in ALL_POPULAR_PACKAGES:
|
|
||||||
if query_lower == pkg.name.lower():
|
|
||||||
results.append({
|
|
||||||
"name": pkg.name,
|
|
||||||
"summary": pkg.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_exact",
|
|
||||||
"category": pkg.category,
|
|
||||||
"estimated_downloads": pkg.estimated_monthly_downloads,
|
|
||||||
"github_stars": pkg.github_stars,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Second pass: name contains query
|
|
||||||
for pkg in ALL_POPULAR_PACKAGES:
|
|
||||||
if query_lower in pkg.name.lower() and pkg.name not in [r["name"] for r in results]:
|
|
||||||
results.append({
|
|
||||||
"name": pkg.name,
|
|
||||||
"summary": pkg.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_name",
|
|
||||||
"category": pkg.category,
|
|
||||||
"estimated_downloads": pkg.estimated_monthly_downloads,
|
|
||||||
"github_stars": pkg.github_stars,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Third pass: description contains query
|
|
||||||
for pkg in ALL_POPULAR_PACKAGES:
|
|
||||||
if (query_lower in pkg.description.lower() or
|
|
||||||
query_lower in pkg.primary_use_case.lower()) and pkg.name not in [r["name"] for r in results]:
|
|
||||||
results.append({
|
|
||||||
"name": pkg.name,
|
|
||||||
"summary": pkg.description,
|
|
||||||
"version": "latest",
|
|
||||||
"source": "curated_desc",
|
|
||||||
"category": pkg.category,
|
|
||||||
"estimated_downloads": pkg.estimated_monthly_downloads,
|
|
||||||
"github_stars": pkg.github_stars,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Sort by popularity (downloads)
|
|
||||||
results.sort(key=lambda x: x.get("estimated_downloads", 0), reverse=True)
|
|
||||||
|
|
||||||
return results[:limit]
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Enhanced curated search error: {e}")
|
|
||||||
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def _search_simple_api(self, query: str, limit: int) -> List[Dict[str, Any]]:
|
|
||||||
"""Search using direct PyPI JSON API for specific packages."""
|
|
||||||
try:
|
|
||||||
# Try direct package lookup if query looks like a package name
|
|
||||||
query_clean = query.strip().lower().replace(" ", "-")
|
|
||||||
|
|
||||||
# Try variations of the query as package names
|
|
||||||
candidates = [
|
|
||||||
query_clean,
|
|
||||||
query_clean.replace("-", "_"),
|
|
||||||
query_clean.replace("_", "-"),
|
|
||||||
query.strip(), # Original query
|
|
||||||
]
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for candidate in candidates:
|
|
||||||
if len(results) >= limit:
|
|
||||||
break
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(candidate)
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
"name": package_data["info"]["name"],
|
|
||||||
"summary": package_data["info"]["summary"] or "",
|
|
||||||
"version": package_data["info"]["version"],
|
|
||||||
"source": "direct_api",
|
|
||||||
"description": package_data["info"]["description"] or "",
|
|
||||||
"author": package_data["info"]["author"] or "",
|
|
||||||
"license": package_data["info"]["license"] or "",
|
|
||||||
})
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
# Package doesn't exist, continue to next candidate
|
|
||||||
continue
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Simple API search error: {e}")
|
|
||||||
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def _parse_search_html(self, html: str, limit: int) -> List[Dict[str, Any]]:
|
async def _parse_search_html(self, html: str, limit: int) -> List[Dict[str, Any]]:
|
||||||
"""Parse PyPI search results from HTML (simplified parser)."""
|
"""Parse PyPI search results from HTML (simplified parser)."""
|
||||||
# This is a simplified parser - in production, you'd use BeautifulSoup
|
# This is a simplified parser - in production, you'd use BeautifulSoup
|
||||||
@ -505,19 +237,9 @@ class PyPISearchClient:
|
|||||||
"""Enhance search results with additional metadata from PyPI API."""
|
"""Enhance search results with additional metadata from PyPI API."""
|
||||||
enhanced = []
|
enhanced = []
|
||||||
|
|
||||||
# Skip enhancement if results already have good metadata from curated source
|
# Process in batches to avoid overwhelming the API
|
||||||
if results and results[0].get("source", "").startswith("curated"):
|
batch_size = 5
|
||||||
logger.info("Using curated results without enhancement")
|
for i in range(0, len(results), batch_size):
|
||||||
return results
|
|
||||||
|
|
||||||
# For direct API results, they're already enhanced
|
|
||||||
if results and results[0].get("source") == "direct_api":
|
|
||||||
logger.info("Using direct API results without additional enhancement")
|
|
||||||
return results
|
|
||||||
|
|
||||||
# Process in small batches to avoid overwhelming the API
|
|
||||||
batch_size = 3
|
|
||||||
for i in range(0, min(len(results), 10), batch_size): # Limit to first 10 results
|
|
||||||
batch = results[i:i + batch_size]
|
batch = results[i:i + batch_size]
|
||||||
batch_tasks = [
|
batch_tasks = [
|
||||||
self._enhance_single_result(result)
|
self._enhance_single_result(result)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -21,6 +21,12 @@ from .package_query import (
|
|||||||
query_package_info,
|
query_package_info,
|
||||||
query_package_versions,
|
query_package_versions,
|
||||||
)
|
)
|
||||||
|
from .metadata import (
|
||||||
|
manage_package_keywords,
|
||||||
|
manage_package_urls,
|
||||||
|
set_package_visibility,
|
||||||
|
update_package_metadata,
|
||||||
|
)
|
||||||
from .publishing import (
|
from .publishing import (
|
||||||
check_pypi_credentials,
|
check_pypi_credentials,
|
||||||
delete_pypi_release,
|
delete_pypi_release,
|
||||||
@ -29,11 +35,11 @@ from .publishing import (
|
|||||||
manage_pypi_maintainers,
|
manage_pypi_maintainers,
|
||||||
upload_package_to_pypi,
|
upload_package_to_pypi,
|
||||||
)
|
)
|
||||||
from .metadata import (
|
from .search import (
|
||||||
manage_package_keywords,
|
find_alternatives,
|
||||||
manage_package_urls,
|
get_trending_packages,
|
||||||
set_package_visibility,
|
search_by_category,
|
||||||
update_package_metadata,
|
search_packages,
|
||||||
)
|
)
|
||||||
from .analytics import (
|
from .analytics import (
|
||||||
analyze_pypi_competition,
|
analyze_pypi_competition,
|
||||||
@ -41,48 +47,8 @@ from .analytics import (
|
|||||||
get_pypi_package_rankings,
|
get_pypi_package_rankings,
|
||||||
get_pypi_security_alerts,
|
get_pypi_security_alerts,
|
||||||
)
|
)
|
||||||
from .discovery import (
|
|
||||||
get_pypi_package_recommendations,
|
|
||||||
get_pypi_trending_today,
|
|
||||||
monitor_pypi_new_releases,
|
|
||||||
search_pypi_by_maintainer,
|
|
||||||
)
|
|
||||||
from .workflow import (
|
|
||||||
check_pypi_upload_requirements,
|
|
||||||
get_pypi_build_logs,
|
|
||||||
preview_pypi_package_page,
|
|
||||||
validate_pypi_package_name,
|
|
||||||
)
|
|
||||||
from .community import (
|
|
||||||
get_pypi_package_reviews,
|
|
||||||
manage_pypi_package_discussions,
|
|
||||||
get_pypi_maintainer_contacts,
|
|
||||||
)
|
|
||||||
from .search import (
|
|
||||||
find_alternatives,
|
|
||||||
get_trending_packages,
|
|
||||||
search_by_category,
|
|
||||||
search_packages,
|
|
||||||
)
|
|
||||||
from .security_tools import (
|
|
||||||
bulk_scan_package_security,
|
|
||||||
scan_pypi_package_security,
|
|
||||||
)
|
|
||||||
from .license_tools import (
|
|
||||||
analyze_pypi_package_license,
|
|
||||||
check_bulk_license_compliance,
|
|
||||||
)
|
|
||||||
from .health_tools import (
|
|
||||||
assess_package_health_score,
|
|
||||||
compare_packages_health_scores,
|
|
||||||
)
|
|
||||||
from .requirements_tools import (
|
|
||||||
analyze_requirements_file_tool,
|
|
||||||
compare_multiple_requirements_files,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
# Core package tools
|
|
||||||
"query_package_info",
|
"query_package_info",
|
||||||
"query_package_versions",
|
"query_package_versions",
|
||||||
"query_package_dependencies",
|
"query_package_dependencies",
|
||||||
@ -94,52 +60,22 @@ __all__ = [
|
|||||||
"get_package_download_stats",
|
"get_package_download_stats",
|
||||||
"get_package_download_trends",
|
"get_package_download_trends",
|
||||||
"get_top_packages_by_downloads",
|
"get_top_packages_by_downloads",
|
||||||
# Search tools
|
|
||||||
"search_packages",
|
"search_packages",
|
||||||
"search_by_category",
|
"search_by_category",
|
||||||
"find_alternatives",
|
"find_alternatives",
|
||||||
"get_trending_packages",
|
"get_trending_packages",
|
||||||
# Publishing tools
|
|
||||||
"upload_package_to_pypi",
|
"upload_package_to_pypi",
|
||||||
"check_pypi_credentials",
|
"check_pypi_credentials",
|
||||||
"get_pypi_upload_history",
|
"get_pypi_upload_history",
|
||||||
"delete_pypi_release",
|
"delete_pypi_release",
|
||||||
"manage_pypi_maintainers",
|
"manage_pypi_maintainers",
|
||||||
"get_pypi_account_info",
|
"get_pypi_account_info",
|
||||||
# Metadata tools
|
|
||||||
"update_package_metadata",
|
"update_package_metadata",
|
||||||
"manage_package_urls",
|
"manage_package_urls",
|
||||||
"set_package_visibility",
|
"set_package_visibility",
|
||||||
"manage_package_keywords",
|
"manage_package_keywords",
|
||||||
# Analytics tools
|
|
||||||
"get_pypi_package_analytics",
|
"get_pypi_package_analytics",
|
||||||
"get_pypi_security_alerts",
|
"get_pypi_security_alerts",
|
||||||
"get_pypi_package_rankings",
|
"get_pypi_package_rankings",
|
||||||
"analyze_pypi_competition",
|
"analyze_pypi_competition",
|
||||||
# Discovery tools
|
|
||||||
"monitor_pypi_new_releases",
|
|
||||||
"get_pypi_trending_today",
|
|
||||||
"search_pypi_by_maintainer",
|
|
||||||
"get_pypi_package_recommendations",
|
|
||||||
# Workflow tools
|
|
||||||
"validate_pypi_package_name",
|
|
||||||
"preview_pypi_package_page",
|
|
||||||
"check_pypi_upload_requirements",
|
|
||||||
"get_pypi_build_logs",
|
|
||||||
# Community tools
|
|
||||||
"get_pypi_package_reviews",
|
|
||||||
"manage_pypi_package_discussions",
|
|
||||||
"get_pypi_maintainer_contacts",
|
|
||||||
# Security tools
|
|
||||||
"scan_pypi_package_security",
|
|
||||||
"bulk_scan_package_security",
|
|
||||||
# License tools
|
|
||||||
"analyze_pypi_package_license",
|
|
||||||
"check_bulk_license_compliance",
|
|
||||||
# Health tools
|
|
||||||
"assess_package_health_score",
|
|
||||||
"compare_packages_health_scores",
|
|
||||||
# Requirements tools
|
|
||||||
"analyze_requirements_file_tool",
|
|
||||||
"compare_multiple_requirements_files",
|
|
||||||
]
|
]
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -39,7 +39,7 @@ async def check_python_compatibility(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
async with PyPIClient() as client:
|
async with PyPIClient() as client:
|
||||||
package_data = await client.get_package_info(package_name, use_cache=use_cache)
|
package_data = await client.get_package_info(package_name, use_cache)
|
||||||
|
|
||||||
info = package_data.get("info", {})
|
info = package_data.get("info", {})
|
||||||
requires_python = info.get("requires_python")
|
requires_python = info.get("requires_python")
|
||||||
@ -103,7 +103,7 @@ async def get_compatible_python_versions(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
async with PyPIClient() as client:
|
async with PyPIClient() as client:
|
||||||
package_data = await client.get_package_info(package_name, use_cache=use_cache)
|
package_data = await client.get_package_info(package_name, use_cache)
|
||||||
|
|
||||||
info = package_data.get("info", {})
|
info = package_data.get("info", {})
|
||||||
requires_python = info.get("requires_python")
|
requires_python = info.get("requires_python")
|
||||||
@ -177,7 +177,7 @@ async def suggest_python_version_for_packages(
|
|||||||
async with PyPIClient() as client:
|
async with PyPIClient() as client:
|
||||||
for package_name in package_names:
|
for package_name in package_names:
|
||||||
try:
|
try:
|
||||||
package_data = await client.get_package_info(package_name, use_cache=use_cache)
|
package_data = await client.get_package_info(package_name, use_cache)
|
||||||
info = package_data.get("info", {})
|
info = package_data.get("info", {})
|
||||||
|
|
||||||
requires_python = info.get("requires_python")
|
requires_python = info.get("requires_python")
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,974 +0,0 @@
|
|||||||
"""Package health scoring and quality assessment tools for PyPI packages."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..core.pypi_client import PyPIClient
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class PackageHealthScorer:
|
|
||||||
"""Comprehensive health and quality scorer for PyPI packages."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.timeout = 30.0
|
|
||||||
|
|
||||||
# Health scoring weights (total = 100)
|
|
||||||
self.weights = {
|
|
||||||
"maintenance": 25, # Maintenance indicators
|
|
||||||
"popularity": 20, # Download stats, stars, usage
|
|
||||||
"documentation": 15, # Documentation quality
|
|
||||||
"testing": 15, # Testing and CI indicators
|
|
||||||
"security": 10, # Security practices
|
|
||||||
"compatibility": 10, # Python version support
|
|
||||||
"metadata": 5, # Metadata completeness
|
|
||||||
}
|
|
||||||
|
|
||||||
# Quality metrics thresholds
|
|
||||||
self.thresholds = {
|
|
||||||
"downloads_monthly_excellent": 1000000,
|
|
||||||
"downloads_monthly_good": 100000,
|
|
||||||
"downloads_monthly_fair": 10000,
|
|
||||||
"version_age_days_fresh": 90,
|
|
||||||
"version_age_days_good": 365,
|
|
||||||
"version_age_days_stale": 730,
|
|
||||||
"python_versions_excellent": 4,
|
|
||||||
"python_versions_good": 3,
|
|
||||||
"python_versions_fair": 2,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def assess_package_health(
|
|
||||||
self,
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_github_metrics: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Assess comprehensive health and quality of a PyPI package.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to assess
|
|
||||||
version: Specific version to assess (optional)
|
|
||||||
include_github_metrics: Whether to fetch GitHub repository metrics
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing health assessment results
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting health assessment for package: {package_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(package_name, version)
|
|
||||||
|
|
||||||
package_version = version or package_data["info"]["version"]
|
|
||||||
|
|
||||||
# Run parallel health assessments
|
|
||||||
assessment_tasks = [
|
|
||||||
self._assess_maintenance_health(package_data),
|
|
||||||
self._assess_popularity_metrics(package_data),
|
|
||||||
self._assess_documentation_quality(package_data),
|
|
||||||
self._assess_testing_indicators(package_data),
|
|
||||||
self._assess_security_practices(package_data),
|
|
||||||
self._assess_compatibility_support(package_data),
|
|
||||||
self._assess_metadata_completeness(package_data),
|
|
||||||
]
|
|
||||||
|
|
||||||
if include_github_metrics:
|
|
||||||
github_url = self._extract_github_url(package_data)
|
|
||||||
if github_url:
|
|
||||||
assessment_tasks.append(self._fetch_github_metrics(github_url))
|
|
||||||
else:
|
|
||||||
assessment_tasks.append(asyncio.create_task(self._empty_github_metrics()))
|
|
||||||
else:
|
|
||||||
assessment_tasks.append(asyncio.create_task(self._empty_github_metrics()))
|
|
||||||
|
|
||||||
results = await asyncio.gather(*assessment_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
# Unpack results
|
|
||||||
(maintenance, popularity, documentation, testing,
|
|
||||||
security, compatibility, metadata, github_metrics) = results
|
|
||||||
|
|
||||||
# Handle exceptions
|
|
||||||
if isinstance(github_metrics, Exception):
|
|
||||||
github_metrics = self._empty_github_metrics()
|
|
||||||
|
|
||||||
# Calculate overall health score
|
|
||||||
health_scores = {
|
|
||||||
"maintenance": maintenance.get("score", 0) if not isinstance(maintenance, Exception) else 0,
|
|
||||||
"popularity": popularity.get("score", 0) if not isinstance(popularity, Exception) else 0,
|
|
||||||
"documentation": documentation.get("score", 0) if not isinstance(documentation, Exception) else 0,
|
|
||||||
"testing": testing.get("score", 0) if not isinstance(testing, Exception) else 0,
|
|
||||||
"security": security.get("score", 0) if not isinstance(security, Exception) else 0,
|
|
||||||
"compatibility": compatibility.get("score", 0) if not isinstance(compatibility, Exception) else 0,
|
|
||||||
"metadata": metadata.get("score", 0) if not isinstance(metadata, Exception) else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
overall_score = sum(
|
|
||||||
health_scores[category] * (self.weights[category] / 100)
|
|
||||||
for category in health_scores
|
|
||||||
)
|
|
||||||
|
|
||||||
health_level = self._calculate_health_level(overall_score)
|
|
||||||
|
|
||||||
# Generate recommendations
|
|
||||||
recommendations = self._generate_health_recommendations(
|
|
||||||
health_scores, maintenance, popularity, documentation,
|
|
||||||
testing, security, compatibility, metadata, github_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"package": package_name,
|
|
||||||
"version": package_version,
|
|
||||||
"assessment_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"overall_health": {
|
|
||||||
"score": round(overall_score, 2),
|
|
||||||
"level": health_level,
|
|
||||||
"max_score": 100,
|
|
||||||
},
|
|
||||||
"category_scores": health_scores,
|
|
||||||
"detailed_assessment": {
|
|
||||||
"maintenance": maintenance if not isinstance(maintenance, Exception) else {"score": 0, "indicators": [], "issues": [str(maintenance)]},
|
|
||||||
"popularity": popularity if not isinstance(popularity, Exception) else {"score": 0, "metrics": {}, "issues": [str(popularity)]},
|
|
||||||
"documentation": documentation if not isinstance(documentation, Exception) else {"score": 0, "indicators": [], "issues": [str(documentation)]},
|
|
||||||
"testing": testing if not isinstance(testing, Exception) else {"score": 0, "indicators": [], "issues": [str(testing)]},
|
|
||||||
"security": security if not isinstance(security, Exception) else {"score": 0, "practices": [], "issues": [str(security)]},
|
|
||||||
"compatibility": compatibility if not isinstance(compatibility, Exception) else {"score": 0, "support": [], "issues": [str(compatibility)]},
|
|
||||||
"metadata": metadata if not isinstance(metadata, Exception) else {"score": 0, "completeness": {}, "issues": [str(metadata)]},
|
|
||||||
"github_metrics": github_metrics,
|
|
||||||
},
|
|
||||||
"recommendations": recommendations,
|
|
||||||
"health_summary": {
|
|
||||||
"strengths": self._identify_strengths(health_scores),
|
|
||||||
"weaknesses": self._identify_weaknesses(health_scores),
|
|
||||||
"improvement_priority": self._prioritize_improvements(health_scores),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Health assessment failed for {package_name}: {e}")
|
|
||||||
raise SearchError(f"Health assessment failed: {e}") from e
|
|
||||||
|
|
||||||
async def _assess_maintenance_health(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess package maintenance health indicators."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
releases = package_data.get("releases", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
indicators = []
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
# Check release frequency
|
|
||||||
if releases:
|
|
||||||
release_dates = []
|
|
||||||
for version_releases in releases.values():
|
|
||||||
for release in version_releases:
|
|
||||||
upload_time = release.get("upload_time_iso_8601")
|
|
||||||
if upload_time:
|
|
||||||
try:
|
|
||||||
release_dates.append(datetime.fromisoformat(upload_time.replace('Z', '+00:00')))
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if release_dates:
|
|
||||||
release_dates.sort(reverse=True)
|
|
||||||
latest_release = release_dates[0]
|
|
||||||
days_since_release = (datetime.now(timezone.utc) - latest_release).days
|
|
||||||
|
|
||||||
if days_since_release <= self.thresholds["version_age_days_fresh"]:
|
|
||||||
score += 25
|
|
||||||
indicators.append(f"Recent release ({days_since_release} days ago)")
|
|
||||||
elif days_since_release <= self.thresholds["version_age_days_good"]:
|
|
||||||
score += 20
|
|
||||||
indicators.append(f"Moderately recent release ({days_since_release} days ago)")
|
|
||||||
elif days_since_release <= self.thresholds["version_age_days_stale"]:
|
|
||||||
score += 10
|
|
||||||
indicators.append(f"Older release ({days_since_release} days ago)")
|
|
||||||
else:
|
|
||||||
issues.append(f"Very old release ({days_since_release} days ago)")
|
|
||||||
|
|
||||||
# Check release consistency (last 5 releases)
|
|
||||||
if len(release_dates) >= 5:
|
|
||||||
recent_releases = release_dates[:5]
|
|
||||||
intervals = []
|
|
||||||
for i in range(len(recent_releases) - 1):
|
|
||||||
interval = (recent_releases[i] - recent_releases[i + 1]).days
|
|
||||||
intervals.append(interval)
|
|
||||||
|
|
||||||
avg_interval = sum(intervals) / len(intervals)
|
|
||||||
if avg_interval <= 180: # Releases every 6 months or less
|
|
||||||
score += 15
|
|
||||||
indicators.append(f"Regular releases (avg {avg_interval:.0f} days)")
|
|
||||||
elif avg_interval <= 365:
|
|
||||||
score += 10
|
|
||||||
indicators.append(f"Periodic releases (avg {avg_interval:.0f} days)")
|
|
||||||
else:
|
|
||||||
issues.append(f"Infrequent releases (avg {avg_interval:.0f} days)")
|
|
||||||
else:
|
|
||||||
issues.append("No release history available")
|
|
||||||
|
|
||||||
# Check for development indicators
|
|
||||||
if "dev" in info.get("version", "").lower() or "alpha" in info.get("version", "").lower():
|
|
||||||
issues.append("Development/alpha version")
|
|
||||||
elif "beta" in info.get("version", "").lower():
|
|
||||||
score += 5
|
|
||||||
indicators.append("Beta version (active development)")
|
|
||||||
else:
|
|
||||||
score += 10
|
|
||||||
indicators.append("Stable version")
|
|
||||||
|
|
||||||
# Check for author/maintainer info
|
|
||||||
if info.get("author") or info.get("maintainer"):
|
|
||||||
score += 10
|
|
||||||
indicators.append("Active maintainer information")
|
|
||||||
else:
|
|
||||||
issues.append("No maintainer information")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"indicators": indicators,
|
|
||||||
"issues": issues,
|
|
||||||
"metrics": {
|
|
||||||
"days_since_last_release": days_since_release if 'days_since_release' in locals() else None,
|
|
||||||
"total_releases": len(releases),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _assess_popularity_metrics(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess package popularity and usage metrics."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
metrics = {}
|
|
||||||
|
|
||||||
# Estimate download popularity (since we don't have direct access)
|
|
||||||
# Use proxy indicators: project URLs, description length, classifiers
|
|
||||||
|
|
||||||
# Check for GitHub stars indicator
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
github_url = None
|
|
||||||
for key, url in project_urls.items():
|
|
||||||
if "github.com" in (url or "").lower():
|
|
||||||
github_url = url
|
|
||||||
break
|
|
||||||
|
|
||||||
if not github_url:
|
|
||||||
home_page = info.get("home_page", "")
|
|
||||||
if "github.com" in home_page:
|
|
||||||
github_url = home_page
|
|
||||||
|
|
||||||
if github_url:
|
|
||||||
score += 15
|
|
||||||
metrics["has_github_repo"] = True
|
|
||||||
else:
|
|
||||||
metrics["has_github_repo"] = False
|
|
||||||
|
|
||||||
# Check description quality as popularity indicator
|
|
||||||
description = info.get("description", "") or ""
|
|
||||||
summary = info.get("summary", "") or ""
|
|
||||||
|
|
||||||
if len(description) > 1000:
|
|
||||||
score += 20
|
|
||||||
metrics["description_quality"] = "excellent"
|
|
||||||
elif len(description) > 500:
|
|
||||||
score += 15
|
|
||||||
metrics["description_quality"] = "good"
|
|
||||||
elif len(description) > 100:
|
|
||||||
score += 10
|
|
||||||
metrics["description_quality"] = "fair"
|
|
||||||
else:
|
|
||||||
metrics["description_quality"] = "poor"
|
|
||||||
|
|
||||||
# Check for comprehensive metadata (popularity indicator)
|
|
||||||
if info.get("keywords"):
|
|
||||||
score += 10
|
|
||||||
if len(info.get("classifiers", [])) > 5:
|
|
||||||
score += 15
|
|
||||||
if info.get("project_urls") and len(info.get("project_urls", {})) > 2:
|
|
||||||
score += 10
|
|
||||||
|
|
||||||
# Check for documentation links
|
|
||||||
docs_indicators = ["documentation", "docs", "readthedocs", "github.io"]
|
|
||||||
has_docs = any(
|
|
||||||
any(indicator in (url or "").lower() for indicator in docs_indicators)
|
|
||||||
for url in project_urls.values()
|
|
||||||
)
|
|
||||||
if has_docs:
|
|
||||||
score += 15
|
|
||||||
metrics["has_documentation"] = True
|
|
||||||
else:
|
|
||||||
metrics["has_documentation"] = False
|
|
||||||
|
|
||||||
# Check for community indicators
|
|
||||||
community_urls = ["issues", "bug", "tracker", "discussion", "forum"]
|
|
||||||
has_community = any(
|
|
||||||
any(indicator in key.lower() for indicator in community_urls)
|
|
||||||
for key in project_urls.keys()
|
|
||||||
)
|
|
||||||
if has_community:
|
|
||||||
score += 15
|
|
||||||
metrics["has_community_links"] = True
|
|
||||||
else:
|
|
||||||
metrics["has_community_links"] = False
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"metrics": metrics,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _assess_documentation_quality(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess documentation quality indicators."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
indicators = []
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
# Check description completeness
|
|
||||||
description = info.get("description", "") or ""
|
|
||||||
summary = info.get("summary", "") or ""
|
|
||||||
|
|
||||||
if len(description) > 2000:
|
|
||||||
score += 30
|
|
||||||
indicators.append("Comprehensive description")
|
|
||||||
elif len(description) > 1000:
|
|
||||||
score += 25
|
|
||||||
indicators.append("Good description length")
|
|
||||||
elif len(description) > 500:
|
|
||||||
score += 15
|
|
||||||
indicators.append("Adequate description")
|
|
||||||
elif len(description) > 100:
|
|
||||||
score += 10
|
|
||||||
indicators.append("Basic description")
|
|
||||||
else:
|
|
||||||
issues.append("Very short or missing description")
|
|
||||||
|
|
||||||
# Check for README indicators in description
|
|
||||||
readme_indicators = ["## ", "### ", "```", "# Installation", "# Usage", "# Examples"]
|
|
||||||
if any(indicator in description for indicator in readme_indicators):
|
|
||||||
score += 20
|
|
||||||
indicators.append("Structured documentation (README-style)")
|
|
||||||
|
|
||||||
# Check for documentation URLs
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
docs_urls = []
|
|
||||||
for key, url in project_urls.items():
|
|
||||||
if any(term in key.lower() for term in ["doc", "guide", "manual", "wiki"]):
|
|
||||||
docs_urls.append(url)
|
|
||||||
|
|
||||||
if docs_urls:
|
|
||||||
score += 25
|
|
||||||
indicators.append(f"Documentation links ({len(docs_urls)} found)")
|
|
||||||
else:
|
|
||||||
issues.append("No dedicated documentation links")
|
|
||||||
|
|
||||||
# Check for example code in description
|
|
||||||
if "```" in description or " " in description: # Code blocks
|
|
||||||
score += 15
|
|
||||||
indicators.append("Contains code examples")
|
|
||||||
|
|
||||||
# Check for installation instructions
|
|
||||||
install_keywords = ["install", "pip install", "setup.py", "requirements"]
|
|
||||||
if any(keyword in description.lower() for keyword in install_keywords):
|
|
||||||
score += 10
|
|
||||||
indicators.append("Installation instructions provided")
|
|
||||||
else:
|
|
||||||
issues.append("No clear installation instructions")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"indicators": indicators,
|
|
||||||
"issues": issues,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _assess_testing_indicators(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess testing and CI/CD indicators."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
indicators = []
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
# Check for testing-related classifiers
|
|
||||||
classifiers = info.get("classifiers", [])
|
|
||||||
testing_classifiers = [c for c in classifiers if "testing" in c.lower()]
|
|
||||||
if testing_classifiers:
|
|
||||||
score += 15
|
|
||||||
indicators.append("Testing framework classifiers")
|
|
||||||
|
|
||||||
# Check for CI/CD indicators in URLs
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
ci_indicators = ["travis", "circleci", "appveyor", "azure", "github", "actions", "ci", "build"]
|
|
||||||
ci_urls = []
|
|
||||||
for key, url in project_urls.items():
|
|
||||||
if any(indicator in key.lower() or indicator in (url or "").lower() for indicator in ci_indicators):
|
|
||||||
ci_urls.append(key)
|
|
||||||
|
|
||||||
if ci_urls:
|
|
||||||
score += 25
|
|
||||||
indicators.append(f"CI/CD indicators ({len(ci_urls)} found)")
|
|
||||||
|
|
||||||
# Check description for testing mentions
|
|
||||||
description = (info.get("description", "") or "").lower()
|
|
||||||
testing_keywords = ["test", "pytest", "unittest", "nose", "coverage", "tox", "ci/cd", "continuous integration"]
|
|
||||||
testing_mentions = [kw for kw in testing_keywords if kw in description]
|
|
||||||
|
|
||||||
if testing_mentions:
|
|
||||||
score += 20
|
|
||||||
indicators.append(f"Testing framework mentions ({len(testing_mentions)} found)")
|
|
||||||
else:
|
|
||||||
issues.append("No testing framework mentions")
|
|
||||||
|
|
||||||
# Check for test dependencies (common patterns)
|
|
||||||
requires_dist = info.get("requires_dist", []) or []
|
|
||||||
test_deps = []
|
|
||||||
for req in requires_dist:
|
|
||||||
req_lower = req.lower()
|
|
||||||
if any(test_pkg in req_lower for test_pkg in ["pytest", "unittest", "nose", "coverage", "tox", "test"]):
|
|
||||||
test_deps.append(req.split()[0])
|
|
||||||
|
|
||||||
if test_deps:
|
|
||||||
score += 20
|
|
||||||
indicators.append(f"Test dependencies ({len(test_deps)} found)")
|
|
||||||
else:
|
|
||||||
issues.append("No test dependencies found")
|
|
||||||
|
|
||||||
# Check for badges (often indicate CI/testing)
|
|
||||||
badge_indicators = ["[![", "https://img.shields.io", "badge", "build status", "coverage"]
|
|
||||||
if any(indicator in description for indicator in badge_indicators):
|
|
||||||
score += 20
|
|
||||||
indicators.append("Status badges (likely CI integration)")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"indicators": indicators,
|
|
||||||
"issues": issues,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _assess_security_practices(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess security practices and indicators."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
practices = []
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
# Check for security-related URLs
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
security_urls = []
|
|
||||||
for key, url in project_urls.items():
|
|
||||||
if any(term in key.lower() for term in ["security", "vulnerability", "report", "bug"]):
|
|
||||||
security_urls.append(key)
|
|
||||||
|
|
||||||
if security_urls:
|
|
||||||
score += 25
|
|
||||||
practices.append(f"Security reporting channels ({len(security_urls)} found)")
|
|
||||||
else:
|
|
||||||
issues.append("No security reporting channels")
|
|
||||||
|
|
||||||
# Check for HTTPS URLs
|
|
||||||
https_urls = [url for url in project_urls.values() if (url or "").startswith("https://")]
|
|
||||||
if len(https_urls) == len([url for url in project_urls.values() if url]):
|
|
||||||
score += 15
|
|
||||||
practices.append("All URLs use HTTPS")
|
|
||||||
elif https_urls:
|
|
||||||
score += 10
|
|
||||||
practices.append("Some URLs use HTTPS")
|
|
||||||
else:
|
|
||||||
issues.append("No HTTPS URLs found")
|
|
||||||
|
|
||||||
# Check for security mentions in description
|
|
||||||
description = (info.get("description", "") or "").lower()
|
|
||||||
security_keywords = ["security", "secure", "vulnerability", "encryption", "authentication", "authorization"]
|
|
||||||
security_mentions = [kw for kw in security_keywords if kw in description]
|
|
||||||
|
|
||||||
if security_mentions:
|
|
||||||
score += 20
|
|
||||||
practices.append(f"Security awareness ({len(security_mentions)} mentions)")
|
|
||||||
|
|
||||||
# Check for license (security practice)
|
|
||||||
if info.get("license") or any("license" in c.lower() for c in info.get("classifiers", [])):
|
|
||||||
score += 15
|
|
||||||
practices.append("Clear license information")
|
|
||||||
else:
|
|
||||||
issues.append("No clear license information")
|
|
||||||
|
|
||||||
# Check for author/maintainer email (security contact)
|
|
||||||
if info.get("author_email") or info.get("maintainer_email"):
|
|
||||||
score += 10
|
|
||||||
practices.append("Maintainer contact information")
|
|
||||||
else:
|
|
||||||
issues.append("No maintainer contact information")
|
|
||||||
|
|
||||||
# Check for requirements specification (dependency security)
|
|
||||||
requires_dist = info.get("requires_dist", [])
|
|
||||||
if requires_dist:
|
|
||||||
# Check for version pinning (security practice)
|
|
||||||
pinned_deps = [req for req in requires_dist if any(op in req for op in ["==", ">=", "~="])]
|
|
||||||
if pinned_deps:
|
|
||||||
score += 15
|
|
||||||
practices.append(f"Version-pinned dependencies ({len(pinned_deps)}/{len(requires_dist)})")
|
|
||||||
else:
|
|
||||||
issues.append("No version-pinned dependencies")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"practices": practices,
|
|
||||||
"issues": issues,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _assess_compatibility_support(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess Python version and platform compatibility."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
support = []
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
# Check Python version support from classifiers
|
|
||||||
classifiers = info.get("classifiers", [])
|
|
||||||
python_versions = []
|
|
||||||
for classifier in classifiers:
|
|
||||||
if "Programming Language :: Python ::" in classifier:
|
|
||||||
version_part = classifier.split("::")[-1].strip()
|
|
||||||
if re.match(r'^\d+\.\d+$', version_part): # Like "3.8", "3.9"
|
|
||||||
python_versions.append(version_part)
|
|
||||||
|
|
||||||
if len(python_versions) >= self.thresholds["python_versions_excellent"]:
|
|
||||||
score += 30
|
|
||||||
support.append(f"Excellent Python version support ({len(python_versions)} versions)")
|
|
||||||
elif len(python_versions) >= self.thresholds["python_versions_good"]:
|
|
||||||
score += 25
|
|
||||||
support.append(f"Good Python version support ({len(python_versions)} versions)")
|
|
||||||
elif len(python_versions) >= self.thresholds["python_versions_fair"]:
|
|
||||||
score += 15
|
|
||||||
support.append(f"Fair Python version support ({len(python_versions)} versions)")
|
|
||||||
elif python_versions:
|
|
||||||
score += 10
|
|
||||||
support.append(f"Limited Python version support ({len(python_versions)} versions)")
|
|
||||||
else:
|
|
||||||
issues.append("No explicit Python version support")
|
|
||||||
|
|
||||||
# Check requires_python specification
|
|
||||||
requires_python = info.get("requires_python")
|
|
||||||
if requires_python:
|
|
||||||
score += 20
|
|
||||||
support.append(f"Python requirement specified: {requires_python}")
|
|
||||||
else:
|
|
||||||
issues.append("No Python version requirement specified")
|
|
||||||
|
|
||||||
# Check platform support
|
|
||||||
platform_classifiers = [c for c in classifiers if "Operating System" in c]
|
|
||||||
if platform_classifiers:
|
|
||||||
if any("OS Independent" in c for c in platform_classifiers):
|
|
||||||
score += 20
|
|
||||||
support.append("Cross-platform support (OS Independent)")
|
|
||||||
else:
|
|
||||||
score += 15
|
|
||||||
support.append(f"Platform support ({len(platform_classifiers)} platforms)")
|
|
||||||
else:
|
|
||||||
issues.append("No platform support information")
|
|
||||||
|
|
||||||
# Check for wheel distribution (compatibility indicator)
|
|
||||||
urls = info.get("urls", []) or []
|
|
||||||
has_wheel = any(url.get("packagetype") == "bdist_wheel" for url in urls)
|
|
||||||
if has_wheel:
|
|
||||||
score += 15
|
|
||||||
support.append("Wheel distribution available")
|
|
||||||
else:
|
|
||||||
issues.append("No wheel distribution")
|
|
||||||
|
|
||||||
# Check development status
|
|
||||||
status_classifiers = [c for c in classifiers if "Development Status" in c]
|
|
||||||
if status_classifiers:
|
|
||||||
status = status_classifiers[0]
|
|
||||||
if "5 - Production/Stable" in status:
|
|
||||||
score += 15
|
|
||||||
support.append("Production/Stable status")
|
|
||||||
elif "4 - Beta" in status:
|
|
||||||
score += 10
|
|
||||||
support.append("Beta status")
|
|
||||||
elif "3 - Alpha" in status:
|
|
||||||
score += 5
|
|
||||||
support.append("Alpha status")
|
|
||||||
else:
|
|
||||||
issues.append(f"Early development status: {status}")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"support": support,
|
|
||||||
"issues": issues,
|
|
||||||
"python_versions": python_versions,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _assess_metadata_completeness(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Assess metadata completeness and quality."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
completeness = {}
|
|
||||||
|
|
||||||
# Essential fields
|
|
||||||
essential_fields = ["name", "version", "summary", "description", "author", "license"]
|
|
||||||
present_essential = [field for field in essential_fields if info.get(field)]
|
|
||||||
score += (len(present_essential) / len(essential_fields)) * 40
|
|
||||||
completeness["essential_fields"] = f"{len(present_essential)}/{len(essential_fields)}"
|
|
||||||
|
|
||||||
# Additional metadata fields
|
|
||||||
additional_fields = ["keywords", "home_page", "author_email", "classifiers", "project_urls"]
|
|
||||||
present_additional = [field for field in additional_fields if info.get(field)]
|
|
||||||
score += (len(present_additional) / len(additional_fields)) * 30
|
|
||||||
completeness["additional_fields"] = f"{len(present_additional)}/{len(additional_fields)}"
|
|
||||||
|
|
||||||
# Classifier completeness
|
|
||||||
classifiers = info.get("classifiers", [])
|
|
||||||
classifier_categories = set()
|
|
||||||
for classifier in classifiers:
|
|
||||||
category = classifier.split("::")[0].strip()
|
|
||||||
classifier_categories.add(category)
|
|
||||||
|
|
||||||
expected_categories = ["Development Status", "Intended Audience", "License", "Programming Language", "Topic"]
|
|
||||||
present_categories = [cat for cat in expected_categories if cat in classifier_categories]
|
|
||||||
score += (len(present_categories) / len(expected_categories)) * 20
|
|
||||||
completeness["classifier_categories"] = f"{len(present_categories)}/{len(expected_categories)}"
|
|
||||||
|
|
||||||
# URLs completeness
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
expected_url_types = ["homepage", "repository", "documentation", "bug tracker"]
|
|
||||||
present_url_types = []
|
|
||||||
for expected in expected_url_types:
|
|
||||||
if any(expected.lower() in key.lower() for key in project_urls.keys()):
|
|
||||||
present_url_types.append(expected)
|
|
||||||
|
|
||||||
score += (len(present_url_types) / len(expected_url_types)) * 10
|
|
||||||
completeness["url_types"] = f"{len(present_url_types)}/{len(expected_url_types)}"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"score": min(score, 100),
|
|
||||||
"completeness": completeness,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_github_url(self, package_data: Dict[str, Any]) -> Optional[str]:
|
|
||||||
"""Extract GitHub repository URL from package data."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
# Check project URLs
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
for url in project_urls.values():
|
|
||||||
if url and "github.com" in url:
|
|
||||||
return url
|
|
||||||
|
|
||||||
# Check home page
|
|
||||||
home_page = info.get("home_page", "")
|
|
||||||
if home_page and "github.com" in home_page:
|
|
||||||
return home_page
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def _fetch_github_metrics(self, github_url: str) -> Dict[str, Any]:
|
|
||||||
"""Fetch GitHub repository metrics."""
|
|
||||||
try:
|
|
||||||
# Parse GitHub URL to get owner/repo
|
|
||||||
parsed = urlparse(github_url)
|
|
||||||
path_parts = parsed.path.strip('/').split('/')
|
|
||||||
if len(path_parts) >= 2:
|
|
||||||
owner, repo = path_parts[0], path_parts[1]
|
|
||||||
|
|
||||||
# GitHub API call (public API, no auth required for basic info)
|
|
||||||
api_url = f"https://api.github.com/repos/{owner}/{repo}"
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
||||||
response = await client.get(
|
|
||||||
api_url,
|
|
||||||
headers={
|
|
||||||
"Accept": "application/vnd.github.v3+json",
|
|
||||||
"User-Agent": "PyPI-Health-Scorer/1.0"
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
data = response.json()
|
|
||||||
return {
|
|
||||||
"stars": data.get("stargazers_count", 0),
|
|
||||||
"forks": data.get("forks_count", 0),
|
|
||||||
"watchers": data.get("watchers_count", 0),
|
|
||||||
"issues": data.get("open_issues_count", 0),
|
|
||||||
"has_wiki": data.get("has_wiki", False),
|
|
||||||
"has_pages": data.get("has_pages", False),
|
|
||||||
"language": data.get("language", ""),
|
|
||||||
"created_at": data.get("created_at", ""),
|
|
||||||
"updated_at": data.get("pushed_at", ""),
|
|
||||||
"default_branch": data.get("default_branch", ""),
|
|
||||||
"archived": data.get("archived", False),
|
|
||||||
"disabled": data.get("disabled", False),
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
logger.warning(f"GitHub API returned status {response.status_code}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Failed to fetch GitHub metrics: {e}")
|
|
||||||
|
|
||||||
return self._empty_github_metrics()
|
|
||||||
|
|
||||||
async def _empty_github_metrics(self) -> Dict[str, Any]:
|
|
||||||
"""Return empty GitHub metrics."""
|
|
||||||
return {
|
|
||||||
"stars": 0,
|
|
||||||
"forks": 0,
|
|
||||||
"watchers": 0,
|
|
||||||
"issues": 0,
|
|
||||||
"has_wiki": False,
|
|
||||||
"has_pages": False,
|
|
||||||
"language": "",
|
|
||||||
"created_at": "",
|
|
||||||
"updated_at": "",
|
|
||||||
"default_branch": "",
|
|
||||||
"archived": False,
|
|
||||||
"disabled": False,
|
|
||||||
"available": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _calculate_health_level(self, score: float) -> str:
|
|
||||||
"""Calculate health level from score."""
|
|
||||||
if score >= 85:
|
|
||||||
return "excellent"
|
|
||||||
elif score >= 70:
|
|
||||||
return "good"
|
|
||||||
elif score >= 55:
|
|
||||||
return "fair"
|
|
||||||
elif score >= 40:
|
|
||||||
return "poor"
|
|
||||||
else:
|
|
||||||
return "critical"
|
|
||||||
|
|
||||||
def _identify_strengths(self, health_scores: Dict[str, float]) -> List[str]:
|
|
||||||
"""Identify package strengths."""
|
|
||||||
strengths = []
|
|
||||||
for category, score in health_scores.items():
|
|
||||||
if score >= 80:
|
|
||||||
strengths.append(f"Excellent {category} ({score:.0f}/100)")
|
|
||||||
elif score >= 65:
|
|
||||||
strengths.append(f"Good {category} ({score:.0f}/100)")
|
|
||||||
return strengths
|
|
||||||
|
|
||||||
def _identify_weaknesses(self, health_scores: Dict[str, float]) -> List[str]:
|
|
||||||
"""Identify package weaknesses."""
|
|
||||||
weaknesses = []
|
|
||||||
for category, score in health_scores.items():
|
|
||||||
if score < 40:
|
|
||||||
weaknesses.append(f"Poor {category} ({score:.0f}/100)")
|
|
||||||
elif score < 55:
|
|
||||||
weaknesses.append(f"Fair {category} ({score:.0f}/100)")
|
|
||||||
return weaknesses
|
|
||||||
|
|
||||||
def _prioritize_improvements(self, health_scores: Dict[str, float]) -> List[str]:
|
|
||||||
"""Prioritize improvement areas by weight and score."""
|
|
||||||
weighted_gaps = []
|
|
||||||
for category, score in health_scores.items():
|
|
||||||
gap = 100 - score
|
|
||||||
weighted_gap = gap * (self.weights[category] / 100)
|
|
||||||
weighted_gaps.append((category, weighted_gap, score))
|
|
||||||
|
|
||||||
# Sort by weighted gap (highest impact first)
|
|
||||||
weighted_gaps.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
priorities = []
|
|
||||||
for category, weighted_gap, score in weighted_gaps[:3]: # Top 3
|
|
||||||
if weighted_gap > 5: # Only include significant gaps
|
|
||||||
priorities.append(f"Improve {category} (current: {score:.0f}/100, impact: {self.weights[category]}%)")
|
|
||||||
|
|
||||||
return priorities
|
|
||||||
|
|
||||||
def _generate_health_recommendations(
|
|
||||||
self, health_scores: Dict[str, float], *assessment_results
|
|
||||||
) -> List[str]:
|
|
||||||
"""Generate actionable health improvement recommendations."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
overall_score = sum(
|
|
||||||
health_scores[category] * (self.weights[category] / 100)
|
|
||||||
for category in health_scores
|
|
||||||
)
|
|
||||||
|
|
||||||
# Overall recommendations
|
|
||||||
if overall_score >= 85:
|
|
||||||
recommendations.append("🌟 Excellent package health - maintain current standards")
|
|
||||||
elif overall_score >= 70:
|
|
||||||
recommendations.append("✅ Good package health - minor improvements possible")
|
|
||||||
elif overall_score >= 55:
|
|
||||||
recommendations.append("⚠️ Fair package health - several areas need improvement")
|
|
||||||
elif overall_score >= 40:
|
|
||||||
recommendations.append("🔶 Poor package health - significant improvements needed")
|
|
||||||
else:
|
|
||||||
recommendations.append("🚨 Critical package health - major overhaul required")
|
|
||||||
|
|
||||||
# Specific recommendations based on low scores
|
|
||||||
if health_scores.get("maintenance", 0) < 60:
|
|
||||||
recommendations.append("📅 Improve maintenance: Update package more regularly, provide clear version history")
|
|
||||||
|
|
||||||
if health_scores.get("documentation", 0) < 60:
|
|
||||||
recommendations.append("📚 Improve documentation: Add comprehensive README, usage examples, and API docs")
|
|
||||||
|
|
||||||
if health_scores.get("testing", 0) < 60:
|
|
||||||
recommendations.append("🧪 Add testing: Implement test suite, CI/CD pipeline, and code coverage")
|
|
||||||
|
|
||||||
if health_scores.get("security", 0) < 60:
|
|
||||||
recommendations.append("🔒 Enhance security: Add security reporting, use HTTPS, specify dependencies properly")
|
|
||||||
|
|
||||||
if health_scores.get("compatibility", 0) < 60:
|
|
||||||
recommendations.append("🔧 Improve compatibility: Support more Python versions, add wheel distribution")
|
|
||||||
|
|
||||||
if health_scores.get("metadata", 0) < 60:
|
|
||||||
recommendations.append("📝 Complete metadata: Add missing package information, keywords, and classifiers")
|
|
||||||
|
|
||||||
if health_scores.get("popularity", 0) < 60:
|
|
||||||
recommendations.append("📈 Build community: Create documentation site, engage with users, add project URLs")
|
|
||||||
|
|
||||||
return recommendations
|
|
||||||
|
|
||||||
|
|
||||||
# Main health assessment functions
|
|
||||||
async def assess_pypi_package_health(
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_github_metrics: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Assess comprehensive health and quality of a PyPI package.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to assess
|
|
||||||
version: Specific version to assess (optional)
|
|
||||||
include_github_metrics: Whether to fetch GitHub repository metrics
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive health assessment including scores and recommendations
|
|
||||||
"""
|
|
||||||
scorer = PackageHealthScorer()
|
|
||||||
return await scorer.assess_package_health(
|
|
||||||
package_name, version, include_github_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def compare_package_health(
|
|
||||||
package_names: List[str],
|
|
||||||
include_github_metrics: bool = False
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Compare health scores across multiple packages.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_names: List of package names to compare
|
|
||||||
include_github_metrics: Whether to include GitHub metrics
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comparative health analysis with rankings
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting health comparison for {len(package_names)} packages")
|
|
||||||
|
|
||||||
scorer = PackageHealthScorer()
|
|
||||||
results = {}
|
|
||||||
|
|
||||||
# Assess packages in parallel batches
|
|
||||||
batch_size = 3
|
|
||||||
for i in range(0, len(package_names), batch_size):
|
|
||||||
batch = package_names[i:i + batch_size]
|
|
||||||
batch_tasks = [
|
|
||||||
scorer.assess_package_health(pkg_name, include_github_metrics=include_github_metrics)
|
|
||||||
for pkg_name in batch
|
|
||||||
]
|
|
||||||
|
|
||||||
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for pkg_name, result in zip(batch, batch_results):
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
results[pkg_name] = {
|
|
||||||
"error": str(result),
|
|
||||||
"overall_health": {"score": 0, "level": "critical"},
|
|
||||||
"category_scores": {cat: 0 for cat in scorer.weights.keys()}
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
results[pkg_name] = result
|
|
||||||
|
|
||||||
# Create comparison rankings
|
|
||||||
package_scores = [
|
|
||||||
(pkg, result.get("overall_health", {}).get("score", 0))
|
|
||||||
for pkg, result in results.items()
|
|
||||||
if "error" not in result
|
|
||||||
]
|
|
||||||
package_scores.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
# Generate comparison insights
|
|
||||||
if package_scores:
|
|
||||||
best_package, best_score = package_scores[0]
|
|
||||||
worst_package, worst_score = package_scores[-1]
|
|
||||||
avg_score = sum(score for _, score in package_scores) / len(package_scores)
|
|
||||||
|
|
||||||
comparison_insights = {
|
|
||||||
"best_package": {"name": best_package, "score": best_score},
|
|
||||||
"worst_package": {"name": worst_package, "score": worst_score},
|
|
||||||
"average_score": round(avg_score, 2),
|
|
||||||
"score_range": best_score - worst_score,
|
|
||||||
"rankings": [{"package": pkg, "score": score, "rank": i+1}
|
|
||||||
for i, (pkg, score) in enumerate(package_scores)]
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
comparison_insights = {
|
|
||||||
"best_package": None,
|
|
||||||
"worst_package": None,
|
|
||||||
"average_score": 0,
|
|
||||||
"score_range": 0,
|
|
||||||
"rankings": []
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
"comparison_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"packages_compared": len(package_names),
|
|
||||||
"detailed_results": results,
|
|
||||||
"comparison_insights": comparison_insights,
|
|
||||||
"recommendations": _generate_comparison_recommendations(comparison_insights, results)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _generate_comparison_recommendations(
|
|
||||||
insights: Dict[str, Any], results: Dict[str, Any]
|
|
||||||
) -> List[str]:
|
|
||||||
"""Generate recommendations for package comparison."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
if not insights.get("rankings"):
|
|
||||||
recommendations.append("❌ No successful health assessments to compare")
|
|
||||||
return recommendations
|
|
||||||
|
|
||||||
best = insights.get("best_package")
|
|
||||||
worst = insights.get("worst_package")
|
|
||||||
avg_score = insights.get("average_score", 0)
|
|
||||||
|
|
||||||
if best and worst:
|
|
||||||
recommendations.append(
|
|
||||||
f"🥇 Best package: {best['name']} (score: {best['score']:.1f}/100)"
|
|
||||||
)
|
|
||||||
recommendations.append(
|
|
||||||
f"🥉 Needs improvement: {worst['name']} (score: {worst['score']:.1f}/100)"
|
|
||||||
)
|
|
||||||
|
|
||||||
if best['score'] - worst['score'] > 30:
|
|
||||||
recommendations.append("📊 Significant quality variation - consider standardizing practices")
|
|
||||||
|
|
||||||
recommendations.append(f"📈 Average health score: {avg_score:.1f}/100")
|
|
||||||
|
|
||||||
if avg_score >= 70:
|
|
||||||
recommendations.append("✅ Overall good package health across portfolio")
|
|
||||||
elif avg_score >= 55:
|
|
||||||
recommendations.append("⚠️ Mixed package health - focus on improving lower-scoring packages")
|
|
||||||
else:
|
|
||||||
recommendations.append("🚨 Poor overall package health - systematic improvements needed")
|
|
||||||
|
|
||||||
return recommendations
|
|
@ -1,155 +0,0 @@
|
|||||||
"""Package health assessment tools for PyPI packages."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..tools.health_scorer import assess_pypi_package_health, compare_package_health
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def assess_package_health_score(
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_github_metrics: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Assess comprehensive health and quality of a PyPI package.
|
|
||||||
|
|
||||||
This tool evaluates package health across multiple dimensions including maintenance,
|
|
||||||
popularity, documentation, testing, security practices, compatibility, and metadata
|
|
||||||
completeness to provide an overall health score and actionable recommendations.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to assess for health and quality
|
|
||||||
version: Specific version to assess (optional, defaults to latest version)
|
|
||||||
include_github_metrics: Whether to fetch GitHub repository metrics for analysis
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comprehensive health assessment including:
|
|
||||||
- Overall health score (0-100) and level (excellent/good/fair/poor/critical)
|
|
||||||
- Category-specific scores (maintenance, popularity, documentation, testing, etc.)
|
|
||||||
- Detailed assessment breakdown with indicators and issues for each category
|
|
||||||
- GitHub repository metrics (stars, forks, activity) if available
|
|
||||||
- Actionable recommendations for health improvements
|
|
||||||
- Strengths, weaknesses, and improvement priorities analysis
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
InvalidPackageNameError: If package name is empty or invalid
|
|
||||||
PackageNotFoundError: If package is not found on PyPI
|
|
||||||
NetworkError: For network-related errors
|
|
||||||
SearchError: If health assessment fails
|
|
||||||
"""
|
|
||||||
if not package_name or not package_name.strip():
|
|
||||||
raise InvalidPackageNameError(package_name)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Assessing health for package {package_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await assess_pypi_package_health(
|
|
||||||
package_name=package_name,
|
|
||||||
version=version,
|
|
||||||
include_github_metrics=include_github_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
overall_score = result.get("overall_health", {}).get("score", 0)
|
|
||||||
health_level = result.get("overall_health", {}).get("level", "unknown")
|
|
||||||
logger.info(f"MCP tool: Health assessment completed for {package_name} - score: {overall_score:.1f}/100 ({health_level})")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (InvalidPackageNameError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error assessing health for {package_name}: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Health assessment failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"package": package_name,
|
|
||||||
"version": version,
|
|
||||||
"assessment_timestamp": "",
|
|
||||||
"overall_health": {
|
|
||||||
"score": 0,
|
|
||||||
"level": "critical",
|
|
||||||
"max_score": 100,
|
|
||||||
},
|
|
||||||
"category_scores": {
|
|
||||||
"maintenance": 0,
|
|
||||||
"popularity": 0,
|
|
||||||
"documentation": 0,
|
|
||||||
"testing": 0,
|
|
||||||
"security": 0,
|
|
||||||
"compatibility": 0,
|
|
||||||
"metadata": 0,
|
|
||||||
},
|
|
||||||
"detailed_assessment": {},
|
|
||||||
"recommendations": [f"❌ Health assessment failed: {e}"],
|
|
||||||
"health_summary": {
|
|
||||||
"strengths": [],
|
|
||||||
"weaknesses": ["Assessment failure"],
|
|
||||||
"improvement_priority": ["Resolve package access issues"],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def compare_packages_health_scores(
|
|
||||||
package_names: List[str],
|
|
||||||
include_github_metrics: bool = False
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Compare health scores across multiple PyPI packages.
|
|
||||||
|
|
||||||
This tool performs comparative health analysis across multiple packages,
|
|
||||||
providing rankings, insights, and recommendations to help evaluate
|
|
||||||
package ecosystem quality and identify the best options.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_names: List of package names to compare for health and quality
|
|
||||||
include_github_metrics: Whether to include GitHub metrics in the comparison
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comparative health analysis including:
|
|
||||||
- Detailed health results for each package
|
|
||||||
- Health score rankings with best/worst package identification
|
|
||||||
- Comparison insights (average scores, score ranges, rankings)
|
|
||||||
- Recommendations for package selection and improvements
|
|
||||||
- Statistical analysis of health across the package set
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If package_names list is empty
|
|
||||||
NetworkError: For network-related errors during analysis
|
|
||||||
SearchError: If health comparison fails
|
|
||||||
"""
|
|
||||||
if not package_names:
|
|
||||||
raise ValueError("Package names list cannot be empty")
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Starting health comparison for {len(package_names)} packages")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await compare_package_health(
|
|
||||||
package_names=package_names,
|
|
||||||
include_github_metrics=include_github_metrics
|
|
||||||
)
|
|
||||||
|
|
||||||
comparison_insights = result.get("comparison_insights", {})
|
|
||||||
best_package = comparison_insights.get("best_package", {})
|
|
||||||
packages_compared = result.get("packages_compared", 0)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Health comparison completed for {packages_compared} packages - best: {best_package.get('name', 'unknown')} ({best_package.get('score', 0):.1f}/100)")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (ValueError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error in health comparison: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Health comparison failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"comparison_timestamp": "",
|
|
||||||
"packages_compared": len(package_names),
|
|
||||||
"detailed_results": {},
|
|
||||||
"comparison_insights": {
|
|
||||||
"best_package": None,
|
|
||||||
"worst_package": None,
|
|
||||||
"average_score": 0,
|
|
||||||
"score_range": 0,
|
|
||||||
"rankings": []
|
|
||||||
},
|
|
||||||
"recommendations": [f"❌ Health comparison failed: {e}"]
|
|
||||||
}
|
|
@ -1,727 +0,0 @@
|
|||||||
"""License compatibility analysis tools for PyPI packages."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..core.pypi_client import PyPIClient
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class LicenseCompatibilityAnalyzer:
|
|
||||||
"""Comprehensive license compatibility analyzer for PyPI packages."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.timeout = 30.0
|
|
||||||
|
|
||||||
# License compatibility matrix based on common license interactions
|
|
||||||
# Key: primary license, Value: dict of compatible licenses with compatibility level
|
|
||||||
self.compatibility_matrix = {
|
|
||||||
"MIT": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "compatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "one-way", # MIT can be used in GPL, not vice versa
|
|
||||||
"GPL-3.0": "one-way",
|
|
||||||
"LGPL-2.1": "compatible",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "review-required",
|
|
||||||
},
|
|
||||||
"BSD": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "compatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "one-way",
|
|
||||||
"GPL-3.0": "one-way",
|
|
||||||
"LGPL-2.1": "compatible",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "review-required",
|
|
||||||
},
|
|
||||||
"Apache-2.0": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "compatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "incompatible", # Patent clause conflicts
|
|
||||||
"GPL-3.0": "one-way", # Apache can go into GPL-3.0
|
|
||||||
"LGPL-2.1": "review-required",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "review-required",
|
|
||||||
},
|
|
||||||
"GPL-2.0": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "incompatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "compatible",
|
|
||||||
"GPL-3.0": "incompatible", # GPL-2.0 and GPL-3.0 are incompatible
|
|
||||||
"LGPL-2.1": "compatible",
|
|
||||||
"LGPL-3.0": "incompatible",
|
|
||||||
"MPL-2.0": "incompatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "incompatible",
|
|
||||||
},
|
|
||||||
"GPL-3.0": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "compatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "incompatible",
|
|
||||||
"GPL-3.0": "compatible",
|
|
||||||
"LGPL-2.1": "review-required",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "incompatible",
|
|
||||||
},
|
|
||||||
"LGPL-2.1": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "review-required",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "compatible",
|
|
||||||
"GPL-3.0": "review-required",
|
|
||||||
"LGPL-2.1": "compatible",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "review-required",
|
|
||||||
},
|
|
||||||
"LGPL-3.0": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "compatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "incompatible",
|
|
||||||
"GPL-3.0": "compatible",
|
|
||||||
"LGPL-2.1": "compatible",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "review-required",
|
|
||||||
},
|
|
||||||
"MPL-2.0": {
|
|
||||||
"MIT": "compatible",
|
|
||||||
"BSD": "compatible",
|
|
||||||
"Apache-2.0": "compatible",
|
|
||||||
"ISC": "compatible",
|
|
||||||
"GPL-2.0": "incompatible",
|
|
||||||
"GPL-3.0": "compatible",
|
|
||||||
"LGPL-2.1": "compatible",
|
|
||||||
"LGPL-3.0": "compatible",
|
|
||||||
"MPL-2.0": "compatible",
|
|
||||||
"Unlicense": "compatible",
|
|
||||||
"Public Domain": "compatible",
|
|
||||||
"Proprietary": "review-required",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
# License categorization for easier analysis
|
|
||||||
self.license_categories = {
|
|
||||||
"permissive": ["MIT", "BSD", "Apache-2.0", "ISC", "Unlicense", "Public Domain"],
|
|
||||||
"copyleft_weak": ["LGPL-2.1", "LGPL-3.0", "MPL-2.0"],
|
|
||||||
"copyleft_strong": ["GPL-2.0", "GPL-3.0", "AGPL-3.0"],
|
|
||||||
"proprietary": ["Proprietary", "Commercial", "All Rights Reserved"],
|
|
||||||
"unknown": ["Unknown", "Other", "Custom"],
|
|
||||||
}
|
|
||||||
|
|
||||||
# Common license normalization patterns
|
|
||||||
self.license_patterns = {
|
|
||||||
r"MIT\s*License": "MIT",
|
|
||||||
r"BSD\s*3[-\s]*Clause": "BSD",
|
|
||||||
r"BSD\s*2[-\s]*Clause": "BSD",
|
|
||||||
r"Apache\s*2\.0": "Apache-2.0",
|
|
||||||
r"Apache\s*License\s*2\.0": "Apache-2.0",
|
|
||||||
r"GNU\s*General\s*Public\s*License\s*v?2": "GPL-2.0",
|
|
||||||
r"GNU\s*General\s*Public\s*License\s*v?3": "GPL-3.0",
|
|
||||||
r"GNU\s*Lesser\s*General\s*Public\s*License\s*v?2": "LGPL-2.1",
|
|
||||||
r"GNU\s*Lesser\s*General\s*Public\s*License\s*v?3": "LGPL-3.0",
|
|
||||||
r"Mozilla\s*Public\s*License\s*2\.0": "MPL-2.0",
|
|
||||||
r"ISC\s*License": "ISC",
|
|
||||||
r"Unlicense": "Unlicense",
|
|
||||||
r"Public\s*Domain": "Public Domain",
|
|
||||||
}
|
|
||||||
|
|
||||||
async def analyze_package_license(
|
|
||||||
self,
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_dependencies: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze license information for a PyPI package.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to analyze
|
|
||||||
version: Specific version to analyze (optional)
|
|
||||||
include_dependencies: Whether to analyze dependency licenses
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing license analysis results
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting license analysis for package: {package_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(package_name, version)
|
|
||||||
|
|
||||||
package_version = version or package_data["info"]["version"]
|
|
||||||
|
|
||||||
# Analyze package license
|
|
||||||
license_info = self._extract_license_info(package_data)
|
|
||||||
|
|
||||||
# Analyze dependencies if requested
|
|
||||||
dependency_licenses = []
|
|
||||||
if include_dependencies:
|
|
||||||
dependency_licenses = await self._analyze_dependency_licenses(
|
|
||||||
package_name, package_version
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate compatibility analysis
|
|
||||||
compatibility_analysis = self._analyze_license_compatibility(
|
|
||||||
license_info, dependency_licenses
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate risk assessment
|
|
||||||
risk_assessment = self._assess_license_risks(
|
|
||||||
license_info, dependency_licenses, compatibility_analysis
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"package": package_name,
|
|
||||||
"version": package_version,
|
|
||||||
"analysis_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"license_info": license_info,
|
|
||||||
"dependency_licenses": dependency_licenses,
|
|
||||||
"compatibility_analysis": compatibility_analysis,
|
|
||||||
"risk_assessment": risk_assessment,
|
|
||||||
"recommendations": self._generate_license_recommendations(
|
|
||||||
license_info, dependency_licenses, compatibility_analysis, risk_assessment
|
|
||||||
),
|
|
||||||
"analysis_summary": {
|
|
||||||
"total_dependencies_analyzed": len(dependency_licenses),
|
|
||||||
"unique_licenses_found": len(set(
|
|
||||||
[license_info.get("normalized_license", "Unknown")] +
|
|
||||||
[dep.get("normalized_license", "Unknown") for dep in dependency_licenses]
|
|
||||||
)),
|
|
||||||
"license_conflicts": len(compatibility_analysis.get("conflicts", [])),
|
|
||||||
"review_required_count": len(compatibility_analysis.get("review_required", [])),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"License analysis failed for {package_name}: {e}")
|
|
||||||
raise SearchError(f"License analysis failed: {e}") from e
|
|
||||||
|
|
||||||
def _extract_license_info(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Extract and normalize license information from package data."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
# Extract license from multiple sources
|
|
||||||
license_field = info.get("license", "")
|
|
||||||
license_classifier = self._extract_license_from_classifiers(
|
|
||||||
info.get("classifiers", [])
|
|
||||||
)
|
|
||||||
|
|
||||||
# Normalize license
|
|
||||||
normalized_license = self._normalize_license(license_field or license_classifier)
|
|
||||||
|
|
||||||
# Categorize license
|
|
||||||
license_category = self._categorize_license(normalized_license)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"raw_license": license_field,
|
|
||||||
"classifier_license": license_classifier,
|
|
||||||
"normalized_license": normalized_license,
|
|
||||||
"license_category": license_category,
|
|
||||||
"license_url": self._extract_license_url(info),
|
|
||||||
"license_confidence": self._assess_license_confidence(
|
|
||||||
license_field, license_classifier, normalized_license
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_license_from_classifiers(self, classifiers: List[str]) -> str:
|
|
||||||
"""Extract license information from PyPI classifiers."""
|
|
||||||
license_classifiers = [
|
|
||||||
c for c in classifiers if c.startswith("License ::")
|
|
||||||
]
|
|
||||||
|
|
||||||
if not license_classifiers:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# Return the most specific license classifier
|
|
||||||
return license_classifiers[-1].replace("License ::", "").strip()
|
|
||||||
|
|
||||||
def _normalize_license(self, license_text: str) -> str:
|
|
||||||
"""Normalize license text to standard SPDX identifiers."""
|
|
||||||
if not license_text:
|
|
||||||
return "Unknown"
|
|
||||||
|
|
||||||
license_text_clean = license_text.strip()
|
|
||||||
|
|
||||||
# Check for exact matches first
|
|
||||||
common_licenses = {
|
|
||||||
"MIT": "MIT",
|
|
||||||
"BSD": "BSD",
|
|
||||||
"Apache": "Apache-2.0",
|
|
||||||
"GPL": "GPL-3.0", # Default to GPL-3.0 if version unspecified
|
|
||||||
"LGPL": "LGPL-3.0",
|
|
||||||
"MPL": "MPL-2.0",
|
|
||||||
}
|
|
||||||
|
|
||||||
if license_text_clean in common_licenses:
|
|
||||||
return common_licenses[license_text_clean]
|
|
||||||
|
|
||||||
# Pattern matching
|
|
||||||
for pattern, normalized in self.license_patterns.items():
|
|
||||||
if re.search(pattern, license_text_clean, re.IGNORECASE):
|
|
||||||
return normalized
|
|
||||||
|
|
||||||
# Check if it contains known license names
|
|
||||||
license_lower = license_text_clean.lower()
|
|
||||||
if "mit" in license_lower:
|
|
||||||
return "MIT"
|
|
||||||
elif "bsd" in license_lower:
|
|
||||||
return "BSD"
|
|
||||||
elif "apache" in license_lower:
|
|
||||||
return "Apache-2.0"
|
|
||||||
elif "gpl" in license_lower and "lgpl" not in license_lower:
|
|
||||||
return "GPL-3.0"
|
|
||||||
elif "lgpl" in license_lower:
|
|
||||||
return "LGPL-3.0"
|
|
||||||
elif "mozilla" in license_lower or "mpl" in license_lower:
|
|
||||||
return "MPL-2.0"
|
|
||||||
elif "unlicense" in license_lower:
|
|
||||||
return "Unlicense"
|
|
||||||
elif "public domain" in license_lower:
|
|
||||||
return "Public Domain"
|
|
||||||
elif any(prop in license_lower for prop in ["proprietary", "commercial", "all rights reserved"]):
|
|
||||||
return "Proprietary"
|
|
||||||
|
|
||||||
return "Other"
|
|
||||||
|
|
||||||
def _categorize_license(self, normalized_license: str) -> str:
|
|
||||||
"""Categorize license into major categories."""
|
|
||||||
for category, licenses in self.license_categories.items():
|
|
||||||
if normalized_license in licenses:
|
|
||||||
return category
|
|
||||||
return "unknown"
|
|
||||||
|
|
||||||
def _extract_license_url(self, info: Dict[str, Any]) -> str:
|
|
||||||
"""Extract license URL from package info."""
|
|
||||||
# Check project URLs
|
|
||||||
project_urls = info.get("project_urls", {}) or {}
|
|
||||||
for key, url in project_urls.items():
|
|
||||||
if "license" in key.lower():
|
|
||||||
return url
|
|
||||||
|
|
||||||
# Check home page for license info
|
|
||||||
home_page = info.get("home_page", "")
|
|
||||||
if home_page and "github.com" in home_page:
|
|
||||||
return f"{home_page.rstrip('/')}/blob/main/LICENSE"
|
|
||||||
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def _assess_license_confidence(
|
|
||||||
self, raw_license: str, classifier_license: str, normalized_license: str
|
|
||||||
) -> str:
|
|
||||||
"""Assess confidence level in license detection."""
|
|
||||||
if not raw_license and not classifier_license:
|
|
||||||
return "low"
|
|
||||||
|
|
||||||
if normalized_license == "Unknown" or normalized_license == "Other":
|
|
||||||
return "low"
|
|
||||||
|
|
||||||
if raw_license and classifier_license and raw_license in classifier_license:
|
|
||||||
return "high"
|
|
||||||
elif raw_license or classifier_license:
|
|
||||||
return "medium"
|
|
||||||
else:
|
|
||||||
return "low"
|
|
||||||
|
|
||||||
async def _analyze_dependency_licenses(
|
|
||||||
self, package_name: str, version: str
|
|
||||||
) -> List[Dict[str, Any]]:
|
|
||||||
"""Analyze licenses of package dependencies."""
|
|
||||||
try:
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(package_name, version)
|
|
||||||
|
|
||||||
# Extract dependencies
|
|
||||||
requires_dist = package_data.get("info", {}).get("requires_dist", []) or []
|
|
||||||
dependencies = []
|
|
||||||
|
|
||||||
for req in requires_dist:
|
|
||||||
# Parse dependency name (simplified)
|
|
||||||
dep_name = req.split()[0].split(">=")[0].split("==")[0].split("~=")[0].split("!=")[0]
|
|
||||||
if dep_name and not dep_name.startswith("extra"):
|
|
||||||
dependencies.append(dep_name)
|
|
||||||
|
|
||||||
# Analyze dependency licenses (limit to top 15 to avoid overwhelming)
|
|
||||||
dependency_licenses = []
|
|
||||||
|
|
||||||
for dep_name in dependencies[:15]:
|
|
||||||
try:
|
|
||||||
dep_data = await client.get_package_info(dep_name)
|
|
||||||
dep_license_info = self._extract_license_info(dep_data)
|
|
||||||
|
|
||||||
dependency_licenses.append({
|
|
||||||
"package": dep_name,
|
|
||||||
"version": dep_data.get("info", {}).get("version", ""),
|
|
||||||
**dep_license_info
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Failed to analyze license for dependency {dep_name}: {e}")
|
|
||||||
dependency_licenses.append({
|
|
||||||
"package": dep_name,
|
|
||||||
"version": "",
|
|
||||||
"normalized_license": "Unknown",
|
|
||||||
"license_category": "unknown",
|
|
||||||
"license_confidence": "low",
|
|
||||||
"error": str(e)
|
|
||||||
})
|
|
||||||
|
|
||||||
return dependency_licenses
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Dependency license analysis failed: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _analyze_license_compatibility(
|
|
||||||
self, package_license: Dict[str, Any], dependency_licenses: List[Dict[str, Any]]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Analyze license compatibility between package and its dependencies."""
|
|
||||||
main_license = package_license.get("normalized_license", "Unknown")
|
|
||||||
|
|
||||||
compatible = []
|
|
||||||
incompatible = []
|
|
||||||
review_required = []
|
|
||||||
one_way = []
|
|
||||||
unknown = []
|
|
||||||
|
|
||||||
for dep in dependency_licenses:
|
|
||||||
dep_license = dep.get("normalized_license", "Unknown")
|
|
||||||
dep_package = dep.get("package", "unknown")
|
|
||||||
|
|
||||||
if main_license == "Unknown" or dep_license == "Unknown":
|
|
||||||
unknown.append({
|
|
||||||
"package": dep_package,
|
|
||||||
"license": dep_license,
|
|
||||||
"reason": "License information unavailable"
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
|
|
||||||
compatibility = self._check_license_compatibility(main_license, dep_license)
|
|
||||||
|
|
||||||
if compatibility == "compatible":
|
|
||||||
compatible.append({
|
|
||||||
"package": dep_package,
|
|
||||||
"license": dep_license,
|
|
||||||
})
|
|
||||||
elif compatibility == "incompatible":
|
|
||||||
incompatible.append({
|
|
||||||
"package": dep_package,
|
|
||||||
"license": dep_license,
|
|
||||||
"reason": f"{main_license} and {dep_license} are incompatible"
|
|
||||||
})
|
|
||||||
elif compatibility == "review-required":
|
|
||||||
review_required.append({
|
|
||||||
"package": dep_package,
|
|
||||||
"license": dep_license,
|
|
||||||
"reason": f"Manual review needed for {main_license} + {dep_license}"
|
|
||||||
})
|
|
||||||
elif compatibility == "one-way":
|
|
||||||
one_way.append({
|
|
||||||
"package": dep_package,
|
|
||||||
"license": dep_license,
|
|
||||||
"reason": f"{dep_license} can be used in {main_license} project"
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"main_license": main_license,
|
|
||||||
"compatible": compatible,
|
|
||||||
"incompatible": incompatible,
|
|
||||||
"review_required": review_required,
|
|
||||||
"one_way": one_way,
|
|
||||||
"unknown": unknown,
|
|
||||||
"conflicts": incompatible, # Alias for easier access
|
|
||||||
}
|
|
||||||
|
|
||||||
def _check_license_compatibility(self, license1: str, license2: str) -> str:
|
|
||||||
"""Check compatibility between two licenses."""
|
|
||||||
if license1 in self.compatibility_matrix:
|
|
||||||
return self.compatibility_matrix[license1].get(license2, "unknown")
|
|
||||||
|
|
||||||
# Fallback compatibility rules
|
|
||||||
if license1 == license2:
|
|
||||||
return "compatible"
|
|
||||||
|
|
||||||
# Default to review required for unknown combinations
|
|
||||||
return "review-required"
|
|
||||||
|
|
||||||
def _assess_license_risks(
|
|
||||||
self,
|
|
||||||
package_license: Dict[str, Any],
|
|
||||||
dependency_licenses: List[Dict[str, Any]],
|
|
||||||
compatibility_analysis: Dict[str, Any]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Assess overall license risks for the project."""
|
|
||||||
risks = []
|
|
||||||
risk_score = 0
|
|
||||||
|
|
||||||
main_license = package_license.get("normalized_license", "Unknown")
|
|
||||||
main_category = package_license.get("license_category", "unknown")
|
|
||||||
|
|
||||||
# Check for incompatible licenses
|
|
||||||
incompatible_count = len(compatibility_analysis.get("incompatible", []))
|
|
||||||
if incompatible_count > 0:
|
|
||||||
risks.append(f"Found {incompatible_count} incompatible license(s)")
|
|
||||||
risk_score += incompatible_count * 30
|
|
||||||
|
|
||||||
# Check for unknown licenses
|
|
||||||
unknown_count = len(compatibility_analysis.get("unknown", []))
|
|
||||||
if unknown_count > 0:
|
|
||||||
risks.append(f"Found {unknown_count} dependency(ies) with unknown licenses")
|
|
||||||
risk_score += unknown_count * 10
|
|
||||||
|
|
||||||
# Check for review-required licenses
|
|
||||||
review_count = len(compatibility_analysis.get("review_required", []))
|
|
||||||
if review_count > 0:
|
|
||||||
risks.append(f"Found {review_count} license(s) requiring manual review")
|
|
||||||
risk_score += review_count * 15
|
|
||||||
|
|
||||||
# Check for copyleft contamination risk
|
|
||||||
if main_category == "permissive":
|
|
||||||
copyleft_deps = [
|
|
||||||
dep for dep in dependency_licenses
|
|
||||||
if dep.get("license_category") in ["copyleft_weak", "copyleft_strong"]
|
|
||||||
]
|
|
||||||
if copyleft_deps:
|
|
||||||
risks.append(f"Permissive project using {len(copyleft_deps)} copyleft dependencies")
|
|
||||||
risk_score += len(copyleft_deps) * 20
|
|
||||||
|
|
||||||
# Check for proprietary license risks
|
|
||||||
proprietary_deps = [
|
|
||||||
dep for dep in dependency_licenses
|
|
||||||
if dep.get("license_category") == "proprietary"
|
|
||||||
]
|
|
||||||
if proprietary_deps:
|
|
||||||
risks.append(f"Found {len(proprietary_deps)} proprietary dependencies")
|
|
||||||
risk_score += len(proprietary_deps) * 25
|
|
||||||
|
|
||||||
# Calculate risk level
|
|
||||||
if risk_score >= 80:
|
|
||||||
risk_level = "critical"
|
|
||||||
elif risk_score >= 50:
|
|
||||||
risk_level = "high"
|
|
||||||
elif risk_score >= 25:
|
|
||||||
risk_level = "medium"
|
|
||||||
elif risk_score > 0:
|
|
||||||
risk_level = "low"
|
|
||||||
else:
|
|
||||||
risk_level = "minimal"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"risk_score": min(risk_score, 100),
|
|
||||||
"risk_level": risk_level,
|
|
||||||
"risk_factors": risks,
|
|
||||||
"compliance_status": "compliant" if risk_score < 25 else "review-needed",
|
|
||||||
}
|
|
||||||
|
|
||||||
def _generate_license_recommendations(
|
|
||||||
self,
|
|
||||||
package_license: Dict[str, Any],
|
|
||||||
dependency_licenses: List[Dict[str, Any]],
|
|
||||||
compatibility_analysis: Dict[str, Any],
|
|
||||||
risk_assessment: Dict[str, Any]
|
|
||||||
) -> List[str]:
|
|
||||||
"""Generate actionable license recommendations."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
main_license = package_license.get("normalized_license", "Unknown")
|
|
||||||
risk_level = risk_assessment.get("risk_level", "unknown")
|
|
||||||
|
|
||||||
# High-level recommendations based on risk
|
|
||||||
if risk_level == "critical":
|
|
||||||
recommendations.append("🚨 Critical license issues detected - immediate legal review required")
|
|
||||||
elif risk_level == "high":
|
|
||||||
recommendations.append("⚠️ High license risk - review and resolve conflicts before release")
|
|
||||||
elif risk_level == "medium":
|
|
||||||
recommendations.append("⚠️ Moderate license risk - review recommendations below")
|
|
||||||
elif risk_level == "minimal":
|
|
||||||
recommendations.append("✅ License compatibility appears good")
|
|
||||||
|
|
||||||
# Specific recommendations for incompatible licenses
|
|
||||||
incompatible = compatibility_analysis.get("incompatible", [])
|
|
||||||
if incompatible:
|
|
||||||
recommendations.append(f"🔴 Remove or replace {len(incompatible)} incompatible dependencies:")
|
|
||||||
for dep in incompatible[:3]: # Show first 3
|
|
||||||
recommendations.append(f" - {dep['package']} ({dep['license']}): {dep.get('reason', '')}")
|
|
||||||
|
|
||||||
# Recommendations for review-required licenses
|
|
||||||
review_required = compatibility_analysis.get("review_required", [])
|
|
||||||
if review_required:
|
|
||||||
recommendations.append(f"📋 Manual review needed for {len(review_required)} dependencies:")
|
|
||||||
for dep in review_required[:3]:
|
|
||||||
recommendations.append(f" - {dep['package']} ({dep['license']})")
|
|
||||||
|
|
||||||
# Unknown license recommendations
|
|
||||||
unknown = compatibility_analysis.get("unknown", [])
|
|
||||||
if unknown:
|
|
||||||
recommendations.append(f"❓ Investigate {len(unknown)} dependencies with unknown licenses")
|
|
||||||
|
|
||||||
# License confidence recommendations
|
|
||||||
if package_license.get("license_confidence") == "low":
|
|
||||||
recommendations.append("📝 Consider adding clear license information to your package")
|
|
||||||
|
|
||||||
# Category-specific recommendations
|
|
||||||
main_category = package_license.get("license_category", "unknown")
|
|
||||||
if main_category == "copyleft_strong":
|
|
||||||
recommendations.append("ℹ️ GPL license requires derivative works to also be GPL")
|
|
||||||
elif main_category == "permissive":
|
|
||||||
recommendations.append("ℹ️ Permissive license allows flexible usage")
|
|
||||||
|
|
||||||
return recommendations
|
|
||||||
|
|
||||||
|
|
||||||
# Main analysis functions
|
|
||||||
async def analyze_package_license_compatibility(
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_dependencies: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze license compatibility for a PyPI package.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to analyze
|
|
||||||
version: Specific version to analyze (optional)
|
|
||||||
include_dependencies: Whether to analyze dependency licenses
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive license compatibility analysis
|
|
||||||
"""
|
|
||||||
analyzer = LicenseCompatibilityAnalyzer()
|
|
||||||
return await analyzer.analyze_package_license(
|
|
||||||
package_name, version, include_dependencies
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def check_license_compliance_bulk(
|
|
||||||
package_names: List[str],
|
|
||||||
target_license: Optional[str] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Check license compliance for multiple packages.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_names: List of package names to check
|
|
||||||
target_license: Target license for compatibility checking
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Bulk license compliance report
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting bulk license compliance check for {len(package_names)} packages")
|
|
||||||
|
|
||||||
analyzer = LicenseCompatibilityAnalyzer()
|
|
||||||
results = {}
|
|
||||||
summary = {
|
|
||||||
"total_packages": len(package_names),
|
|
||||||
"compliant_packages": 0,
|
|
||||||
"non_compliant_packages": 0,
|
|
||||||
"unknown_license_packages": 0,
|
|
||||||
"high_risk_packages": [],
|
|
||||||
"analysis_timestamp": datetime.now(timezone.utc).isoformat()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Analyze packages in parallel batches
|
|
||||||
batch_size = 5
|
|
||||||
for i in range(0, len(package_names), batch_size):
|
|
||||||
batch = package_names[i:i + batch_size]
|
|
||||||
batch_tasks = [
|
|
||||||
analyzer.analyze_package_license(pkg_name, include_dependencies=False)
|
|
||||||
for pkg_name in batch
|
|
||||||
]
|
|
||||||
|
|
||||||
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for pkg_name, result in zip(batch, batch_results):
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
results[pkg_name] = {
|
|
||||||
"error": str(result),
|
|
||||||
"analysis_status": "failed"
|
|
||||||
}
|
|
||||||
summary["unknown_license_packages"] += 1
|
|
||||||
else:
|
|
||||||
results[pkg_name] = result
|
|
||||||
|
|
||||||
# Update summary
|
|
||||||
risk_level = result.get("risk_assessment", {}).get("risk_level", "unknown")
|
|
||||||
if risk_level in ["minimal", "low"]:
|
|
||||||
summary["compliant_packages"] += 1
|
|
||||||
else:
|
|
||||||
summary["non_compliant_packages"] += 1
|
|
||||||
|
|
||||||
if risk_level in ["high", "critical"]:
|
|
||||||
summary["high_risk_packages"].append({
|
|
||||||
"package": pkg_name,
|
|
||||||
"license": result.get("license_info", {}).get("normalized_license", "Unknown"),
|
|
||||||
"risk_level": risk_level
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"summary": summary,
|
|
||||||
"detailed_results": results,
|
|
||||||
"target_license": target_license,
|
|
||||||
"recommendations": _generate_bulk_license_recommendations(summary, results)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _generate_bulk_license_recommendations(summary: Dict[str, Any], results: Dict[str, Any]) -> List[str]:
|
|
||||||
"""Generate recommendations for bulk license analysis."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
compliant = summary["compliant_packages"]
|
|
||||||
total = summary["total_packages"]
|
|
||||||
|
|
||||||
if compliant == total:
|
|
||||||
recommendations.append("✅ All packages appear to have compliant licenses")
|
|
||||||
else:
|
|
||||||
non_compliant = summary["non_compliant_packages"]
|
|
||||||
percentage = (non_compliant / total) * 100
|
|
||||||
recommendations.append(
|
|
||||||
f"⚠️ {non_compliant}/{total} packages ({percentage:.1f}%) have license compliance issues"
|
|
||||||
)
|
|
||||||
|
|
||||||
high_risk = summary["high_risk_packages"]
|
|
||||||
if high_risk:
|
|
||||||
recommendations.append(
|
|
||||||
f"🚨 {len(high_risk)} packages are high risk: {', '.join([p['package'] for p in high_risk])}"
|
|
||||||
)
|
|
||||||
recommendations.append("Priority: Address high-risk packages immediately")
|
|
||||||
|
|
||||||
unknown = summary["unknown_license_packages"]
|
|
||||||
if unknown > 0:
|
|
||||||
recommendations.append(f"❓ {unknown} packages have unknown or unclear licenses")
|
|
||||||
recommendations.append("Consider investigating these packages for license clarity")
|
|
||||||
|
|
||||||
return recommendations
|
|
@ -1,154 +0,0 @@
|
|||||||
"""License compatibility analysis tools for PyPI packages."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..tools.license_analyzer import analyze_package_license_compatibility, check_license_compliance_bulk
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def analyze_pypi_package_license(
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_dependencies: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze license compatibility for a PyPI package.
|
|
||||||
|
|
||||||
This tool provides comprehensive license analysis including license identification,
|
|
||||||
dependency license scanning, compatibility checking, and risk assessment to help
|
|
||||||
ensure your project complies with open source license requirements.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to analyze for license compatibility
|
|
||||||
version: Specific version to analyze (optional, defaults to latest version)
|
|
||||||
include_dependencies: Whether to analyze dependency licenses for compatibility
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comprehensive license analysis including:
|
|
||||||
- License identification and normalization (SPDX format)
|
|
||||||
- License categorization (permissive, copyleft, proprietary, etc.)
|
|
||||||
- Dependency license analysis and compatibility matrix
|
|
||||||
- Risk assessment with score and risk level (minimal, low, medium, high, critical)
|
|
||||||
- Compatibility analysis highlighting conflicts and review-required combinations
|
|
||||||
- Actionable recommendations for license compliance
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
InvalidPackageNameError: If package name is empty or invalid
|
|
||||||
PackageNotFoundError: If package is not found on PyPI
|
|
||||||
NetworkError: For network-related errors
|
|
||||||
SearchError: If license analysis fails
|
|
||||||
"""
|
|
||||||
if not package_name or not package_name.strip():
|
|
||||||
raise InvalidPackageNameError(package_name)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Analyzing license compatibility for package {package_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await analyze_package_license_compatibility(
|
|
||||||
package_name=package_name,
|
|
||||||
version=version,
|
|
||||||
include_dependencies=include_dependencies
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: License analysis completed for {package_name} - {result.get('analysis_summary', {}).get('license_conflicts', 0)} conflicts found")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (InvalidPackageNameError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error analyzing license for {package_name}: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"License analysis failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"package": package_name,
|
|
||||||
"version": version,
|
|
||||||
"analysis_timestamp": "",
|
|
||||||
"license_info": {
|
|
||||||
"normalized_license": "Unknown",
|
|
||||||
"license_category": "unknown",
|
|
||||||
"license_confidence": "low",
|
|
||||||
},
|
|
||||||
"dependency_licenses": [],
|
|
||||||
"compatibility_analysis": {
|
|
||||||
"main_license": "Unknown",
|
|
||||||
"compatible": [],
|
|
||||||
"incompatible": [],
|
|
||||||
"review_required": [],
|
|
||||||
"conflicts": [],
|
|
||||||
},
|
|
||||||
"risk_assessment": {
|
|
||||||
"risk_score": 100,
|
|
||||||
"risk_level": "critical",
|
|
||||||
"risk_factors": [f"License analysis failed: {e}"],
|
|
||||||
"compliance_status": "unknown",
|
|
||||||
},
|
|
||||||
"recommendations": [f"❌ License analysis failed: {e}"],
|
|
||||||
"analysis_summary": {
|
|
||||||
"total_dependencies_analyzed": 0,
|
|
||||||
"unique_licenses_found": 0,
|
|
||||||
"license_conflicts": 0,
|
|
||||||
"review_required_count": 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def check_bulk_license_compliance(
|
|
||||||
package_names: List[str],
|
|
||||||
target_license: Optional[str] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Check license compliance for multiple PyPI packages.
|
|
||||||
|
|
||||||
This tool performs bulk license compliance checking across multiple packages,
|
|
||||||
providing a consolidated report to help ensure your entire package ecosystem
|
|
||||||
complies with license requirements and identifying potential legal risks.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_names: List of package names to check for license compliance
|
|
||||||
target_license: Target license for compatibility checking (optional)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing bulk compliance analysis including:
|
|
||||||
- Summary statistics (total packages, compliant/non-compliant counts)
|
|
||||||
- Detailed license analysis for each package
|
|
||||||
- High-risk packages requiring immediate attention
|
|
||||||
- Unknown license packages needing investigation
|
|
||||||
- Prioritized recommendations for compliance remediation
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If package_names list is empty
|
|
||||||
NetworkError: For network-related errors during analysis
|
|
||||||
SearchError: If bulk compliance checking fails
|
|
||||||
"""
|
|
||||||
if not package_names:
|
|
||||||
raise ValueError("Package names list cannot be empty")
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Starting bulk license compliance check for {len(package_names)} packages")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await check_license_compliance_bulk(
|
|
||||||
package_names=package_names,
|
|
||||||
target_license=target_license
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Bulk license compliance completed - {result.get('summary', {}).get('non_compliant_packages', 0)} non-compliant packages found")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (ValueError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error in bulk license compliance check: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Bulk license compliance check failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"summary": {
|
|
||||||
"total_packages": len(package_names),
|
|
||||||
"compliant_packages": 0,
|
|
||||||
"non_compliant_packages": 0,
|
|
||||||
"unknown_license_packages": len(package_names),
|
|
||||||
"high_risk_packages": [],
|
|
||||||
"analysis_timestamp": ""
|
|
||||||
},
|
|
||||||
"detailed_results": {},
|
|
||||||
"target_license": target_license,
|
|
||||||
"recommendations": [f"❌ Bulk license compliance check failed: {e}"]
|
|
||||||
}
|
|
@ -1,947 +0,0 @@
|
|||||||
"""Requirements file parsing and analysis tools for Python projects."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import tomllib
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..core.pypi_client import PyPIClient
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class RequirementsAnalyzer:
|
|
||||||
"""Comprehensive requirements file analyzer for Python projects."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.timeout = 30.0
|
|
||||||
|
|
||||||
# Supported requirement file patterns
|
|
||||||
self.requirement_patterns = {
|
|
||||||
"requirements.txt": r"requirements.*\.txt",
|
|
||||||
"pyproject.toml": r"pyproject\.toml",
|
|
||||||
"setup.py": r"setup\.py",
|
|
||||||
"Pipfile": r"Pipfile",
|
|
||||||
"poetry.lock": r"poetry\.lock",
|
|
||||||
"conda.yml": r"(conda|environment)\.ya?ml",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Version specifier patterns
|
|
||||||
self.version_patterns = {
|
|
||||||
"exact": r"==\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
"gte": r">=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
"gt": r">\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
"lte": r"<=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
"lt": r"<\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
"compatible": r"~=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
"not_equal": r"!=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
|
|
||||||
}
|
|
||||||
|
|
||||||
async def analyze_requirements_file(
|
|
||||||
self,
|
|
||||||
file_path: str,
|
|
||||||
check_updates: bool = True,
|
|
||||||
security_scan: bool = True,
|
|
||||||
compatibility_check: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze a requirements file for dependencies, versions, security, and compatibility.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the requirements file
|
|
||||||
check_updates: Whether to check for package updates
|
|
||||||
security_scan: Whether to perform security vulnerability scanning
|
|
||||||
compatibility_check: Whether to check Python version compatibility
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comprehensive requirements analysis
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting requirements analysis for: {file_path}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Parse requirements file
|
|
||||||
parsed_requirements = await self._parse_requirements_file(file_path)
|
|
||||||
|
|
||||||
if not parsed_requirements["dependencies"]:
|
|
||||||
return {
|
|
||||||
"file_path": file_path,
|
|
||||||
"analysis_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"file_info": parsed_requirements["file_info"],
|
|
||||||
"dependencies": [],
|
|
||||||
"analysis_summary": {
|
|
||||||
"total_dependencies": 0,
|
|
||||||
"outdated_packages": 0,
|
|
||||||
"security_vulnerabilities": 0,
|
|
||||||
"compatibility_issues": 0,
|
|
||||||
},
|
|
||||||
"recommendations": ["No dependencies found to analyze"],
|
|
||||||
"error": "No dependencies found in requirements file"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Analyze dependencies in parallel
|
|
||||||
analysis_tasks = []
|
|
||||||
|
|
||||||
# Basic dependency analysis (always done)
|
|
||||||
analysis_tasks.append(self._analyze_dependency_health(parsed_requirements["dependencies"]))
|
|
||||||
|
|
||||||
# Optional analyses
|
|
||||||
if check_updates:
|
|
||||||
analysis_tasks.append(self._check_package_updates(parsed_requirements["dependencies"]))
|
|
||||||
else:
|
|
||||||
analysis_tasks.append(asyncio.create_task(self._empty_updates_result()))
|
|
||||||
|
|
||||||
if security_scan:
|
|
||||||
analysis_tasks.append(self._scan_dependencies_security(parsed_requirements["dependencies"]))
|
|
||||||
else:
|
|
||||||
analysis_tasks.append(asyncio.create_task(self._empty_security_result()))
|
|
||||||
|
|
||||||
if compatibility_check:
|
|
||||||
python_version = parsed_requirements.get("python_version")
|
|
||||||
analysis_tasks.append(self._check_dependencies_compatibility(parsed_requirements["dependencies"], python_version))
|
|
||||||
else:
|
|
||||||
analysis_tasks.append(asyncio.create_task(self._empty_compatibility_result()))
|
|
||||||
|
|
||||||
# Execute analyses
|
|
||||||
results = await asyncio.gather(*analysis_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
# Unpack results
|
|
||||||
health_analysis = results[0] if not isinstance(results[0], Exception) else {"healthy": [], "issues": []}
|
|
||||||
update_analysis = results[1] if not isinstance(results[1], Exception) else {"outdated": [], "current": []}
|
|
||||||
security_analysis = results[2] if not isinstance(results[2], Exception) else {"vulnerabilities": [], "secure": []}
|
|
||||||
compatibility_analysis = results[3] if not isinstance(results[3], Exception) else {"compatible": [], "incompatible": []}
|
|
||||||
|
|
||||||
# Generate comprehensive analysis
|
|
||||||
analysis_summary = self._generate_analysis_summary(
|
|
||||||
parsed_requirements["dependencies"],
|
|
||||||
health_analysis,
|
|
||||||
update_analysis,
|
|
||||||
security_analysis,
|
|
||||||
compatibility_analysis
|
|
||||||
)
|
|
||||||
|
|
||||||
recommendations = self._generate_requirements_recommendations(
|
|
||||||
parsed_requirements,
|
|
||||||
health_analysis,
|
|
||||||
update_analysis,
|
|
||||||
security_analysis,
|
|
||||||
compatibility_analysis,
|
|
||||||
analysis_summary
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"file_path": file_path,
|
|
||||||
"analysis_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"file_info": parsed_requirements["file_info"],
|
|
||||||
"dependencies": parsed_requirements["dependencies"],
|
|
||||||
"dependency_analysis": {
|
|
||||||
"health": health_analysis,
|
|
||||||
"updates": update_analysis if check_updates else None,
|
|
||||||
"security": security_analysis if security_scan else None,
|
|
||||||
"compatibility": compatibility_analysis if compatibility_check else None,
|
|
||||||
},
|
|
||||||
"analysis_summary": analysis_summary,
|
|
||||||
"recommendations": recommendations,
|
|
||||||
"python_requirements": parsed_requirements.get("python_version"),
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Requirements analysis failed for {file_path}: {e}")
|
|
||||||
raise SearchError(f"Requirements analysis failed: {e}") from e
|
|
||||||
|
|
||||||
async def _parse_requirements_file(self, file_path: str) -> Dict[str, Any]:
|
|
||||||
"""Parse requirements from various file formats."""
|
|
||||||
path = Path(file_path)
|
|
||||||
|
|
||||||
if not path.exists():
|
|
||||||
raise FileNotFoundError(f"Requirements file not found: {file_path}")
|
|
||||||
|
|
||||||
file_info = {
|
|
||||||
"name": path.name,
|
|
||||||
"format": self._detect_file_format(path.name),
|
|
||||||
"size_bytes": path.stat().st_size,
|
|
||||||
"modified_time": datetime.fromtimestamp(path.stat().st_mtime, timezone.utc).isoformat(),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse based on file format
|
|
||||||
if path.name.endswith('.txt'):
|
|
||||||
dependencies, python_version = await self._parse_requirements_txt(path)
|
|
||||||
elif path.name == 'pyproject.toml':
|
|
||||||
dependencies, python_version = await self._parse_pyproject_toml(path)
|
|
||||||
elif path.name == 'setup.py':
|
|
||||||
dependencies, python_version = await self._parse_setup_py(path)
|
|
||||||
elif path.name == 'Pipfile':
|
|
||||||
dependencies, python_version = await self._parse_pipfile(path)
|
|
||||||
elif path.name.endswith('.yml') or path.name.endswith('.yaml'):
|
|
||||||
dependencies, python_version = await self._parse_conda_yml(path)
|
|
||||||
else:
|
|
||||||
# Try to parse as requirements.txt format
|
|
||||||
dependencies, python_version = await self._parse_requirements_txt(path)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"file_info": file_info,
|
|
||||||
"dependencies": dependencies,
|
|
||||||
"python_version": python_version,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _detect_file_format(self, filename: str) -> str:
|
|
||||||
"""Detect requirements file format."""
|
|
||||||
filename_lower = filename.lower()
|
|
||||||
|
|
||||||
for fmt, pattern in self.requirement_patterns.items():
|
|
||||||
if re.match(pattern, filename_lower):
|
|
||||||
return fmt
|
|
||||||
|
|
||||||
return "unknown"
|
|
||||||
|
|
||||||
async def _parse_requirements_txt(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
|
|
||||||
"""Parse requirements.txt format files."""
|
|
||||||
dependencies = []
|
|
||||||
python_version = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
content = path.read_text(encoding="utf-8")
|
|
||||||
lines = content.splitlines()
|
|
||||||
|
|
||||||
for line_num, line in enumerate(lines, 1):
|
|
||||||
line = line.strip()
|
|
||||||
|
|
||||||
# Skip comments and empty lines
|
|
||||||
if not line or line.startswith('#'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Skip -r and -e directives (for now)
|
|
||||||
if line.startswith(('-r', '-e', '--')):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Parse requirement line
|
|
||||||
dep = self._parse_requirement_line(line, line_num)
|
|
||||||
if dep:
|
|
||||||
dependencies.append(dep)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to parse requirements.txt {path}: {e}")
|
|
||||||
|
|
||||||
return dependencies, python_version
|
|
||||||
|
|
||||||
async def _parse_pyproject_toml(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
|
|
||||||
"""Parse pyproject.toml files."""
|
|
||||||
dependencies = []
|
|
||||||
python_version = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
content = path.read_text(encoding="utf-8")
|
|
||||||
data = tomllib.loads(content)
|
|
||||||
|
|
||||||
# Extract Python version requirement
|
|
||||||
build_system = data.get("build-system", {})
|
|
||||||
project = data.get("project", {})
|
|
||||||
tool_poetry = data.get("tool", {}).get("poetry", {})
|
|
||||||
|
|
||||||
# Check for Python version in different places
|
|
||||||
if project.get("requires-python"):
|
|
||||||
python_version = project["requires-python"]
|
|
||||||
elif tool_poetry.get("dependencies", {}).get("python"):
|
|
||||||
python_version = tool_poetry["dependencies"]["python"]
|
|
||||||
|
|
||||||
# Extract dependencies from project.dependencies
|
|
||||||
if "dependencies" in project:
|
|
||||||
for dep_line in project["dependencies"]:
|
|
||||||
dep = self._parse_requirement_line(dep_line, 0)
|
|
||||||
if dep:
|
|
||||||
dependencies.append(dep)
|
|
||||||
|
|
||||||
# Extract from tool.poetry.dependencies
|
|
||||||
if "tool" in data and "poetry" in data["tool"] and "dependencies" in data["tool"]["poetry"]:
|
|
||||||
poetry_deps = data["tool"]["poetry"]["dependencies"]
|
|
||||||
for name, version_spec in poetry_deps.items():
|
|
||||||
if name.lower() == "python":
|
|
||||||
continue # Skip Python version
|
|
||||||
|
|
||||||
if isinstance(version_spec, str):
|
|
||||||
req_line = f"{name}{version_spec}" if version_spec.startswith(('=', '<', '>', '~', '^', '!')) else f"{name}=={version_spec}"
|
|
||||||
else:
|
|
||||||
# Handle complex version specifications
|
|
||||||
req_line = f"{name}>={version_spec.get('version', '0.0.0')}"
|
|
||||||
|
|
||||||
dep = self._parse_requirement_line(req_line, 0)
|
|
||||||
if dep:
|
|
||||||
dependencies.append(dep)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to parse pyproject.toml {path}: {e}")
|
|
||||||
|
|
||||||
return dependencies, python_version
|
|
||||||
|
|
||||||
async def _parse_setup_py(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
|
|
||||||
"""Parse setup.py files (basic extraction)."""
|
|
||||||
dependencies = []
|
|
||||||
python_version = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
content = path.read_text(encoding="utf-8")
|
|
||||||
|
|
||||||
# Look for install_requires
|
|
||||||
install_requires_match = re.search(r"install_requires\s*=\s*\[(.*?)\]", content, re.DOTALL)
|
|
||||||
if install_requires_match:
|
|
||||||
deps_text = install_requires_match.group(1)
|
|
||||||
# Extract quoted strings
|
|
||||||
quoted_deps = re.findall(r'["\']([^"\']+)["\']', deps_text)
|
|
||||||
|
|
||||||
for dep_line in quoted_deps:
|
|
||||||
dep = self._parse_requirement_line(dep_line, 0)
|
|
||||||
if dep:
|
|
||||||
dependencies.append(dep)
|
|
||||||
|
|
||||||
# Look for python_requires
|
|
||||||
python_requires_match = re.search(r"python_requires\s*=\s*[\"']([^\"']+)[\"']", content)
|
|
||||||
if python_requires_match:
|
|
||||||
python_version = python_requires_match.group(1)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to parse setup.py {path}: {e}")
|
|
||||||
|
|
||||||
return dependencies, python_version
|
|
||||||
|
|
||||||
async def _parse_pipfile(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
|
|
||||||
"""Parse Pipfile format."""
|
|
||||||
dependencies = []
|
|
||||||
python_version = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
content = path.read_text(encoding="utf-8")
|
|
||||||
data = tomllib.loads(content)
|
|
||||||
|
|
||||||
# Extract Python version
|
|
||||||
if "requires" in data and "python_version" in data["requires"]:
|
|
||||||
python_version = f">={data['requires']['python_version']}"
|
|
||||||
|
|
||||||
# Extract packages
|
|
||||||
for section in ["packages", "dev-packages"]:
|
|
||||||
if section in data:
|
|
||||||
for name, version_spec in data[section].items():
|
|
||||||
if isinstance(version_spec, str):
|
|
||||||
req_line = f"{name}{version_spec}" if version_spec.startswith(('=', '<', '>', '~', '^', '!')) else f"{name}=={version_spec}"
|
|
||||||
else:
|
|
||||||
req_line = f"{name}>={version_spec.get('version', '0.0.0')}"
|
|
||||||
|
|
||||||
dep = self._parse_requirement_line(req_line, 0)
|
|
||||||
if dep:
|
|
||||||
dep["dev_dependency"] = (section == "dev-packages")
|
|
||||||
dependencies.append(dep)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to parse Pipfile {path}: {e}")
|
|
||||||
|
|
||||||
return dependencies, python_version
|
|
||||||
|
|
||||||
async def _parse_conda_yml(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
|
|
||||||
"""Parse conda environment.yml files."""
|
|
||||||
dependencies = []
|
|
||||||
python_version = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
content = path.read_text(encoding="utf-8")
|
|
||||||
data = yaml.safe_load(content)
|
|
||||||
|
|
||||||
if "dependencies" in data:
|
|
||||||
for dep in data["dependencies"]:
|
|
||||||
if isinstance(dep, str):
|
|
||||||
if dep.startswith("python"):
|
|
||||||
# Extract Python version
|
|
||||||
python_match = re.search(r"python\s*([><=~!]+)\s*([0-9.]+)", dep)
|
|
||||||
if python_match:
|
|
||||||
python_version = f"{python_match.group(1)}{python_match.group(2)}"
|
|
||||||
else:
|
|
||||||
parsed_dep = self._parse_requirement_line(dep, 0)
|
|
||||||
if parsed_dep:
|
|
||||||
dependencies.append(parsed_dep)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to parse conda.yml {path}: {e}")
|
|
||||||
|
|
||||||
return dependencies, python_version
|
|
||||||
|
|
||||||
def _parse_requirement_line(self, line: str, line_number: int) -> Optional[Dict[str, Any]]:
|
|
||||||
"""Parse a single requirement line."""
|
|
||||||
try:
|
|
||||||
# Remove inline comments
|
|
||||||
if '#' in line:
|
|
||||||
line = line[:line.index('#')].strip()
|
|
||||||
|
|
||||||
if not line:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Handle extras (package[extra1,extra2])
|
|
||||||
extras = []
|
|
||||||
extras_match = re.search(r'\[([^\]]+)\]', line)
|
|
||||||
if extras_match:
|
|
||||||
extras = [e.strip() for e in extras_match.group(1).split(',')]
|
|
||||||
line = re.sub(r'\[([^\]]+)\]', '', line)
|
|
||||||
|
|
||||||
# Parse package name and version specifiers
|
|
||||||
# Split on version operators
|
|
||||||
version_ops = ['>=', '<=', '==', '!=', '~=', '>', '<']
|
|
||||||
package_name = line
|
|
||||||
version_specifiers = []
|
|
||||||
|
|
||||||
for op in version_ops:
|
|
||||||
if op in line:
|
|
||||||
parts = line.split(op)
|
|
||||||
package_name = parts[0].strip()
|
|
||||||
if len(parts) > 1:
|
|
||||||
version_specifiers.append({
|
|
||||||
"operator": op,
|
|
||||||
"version": parts[1].strip().split(',')[0].strip()
|
|
||||||
})
|
|
||||||
break
|
|
||||||
|
|
||||||
# Handle comma-separated version specs
|
|
||||||
if ',' in line and version_specifiers:
|
|
||||||
remaining = line.split(version_specifiers[0]["operator"], 1)[1]
|
|
||||||
for spec in remaining.split(',')[1:]:
|
|
||||||
spec = spec.strip()
|
|
||||||
for op in version_ops:
|
|
||||||
if spec.startswith(op):
|
|
||||||
version_specifiers.append({
|
|
||||||
"operator": op,
|
|
||||||
"version": spec[len(op):].strip()
|
|
||||||
})
|
|
||||||
break
|
|
||||||
|
|
||||||
# Clean package name
|
|
||||||
package_name = re.sub(r'[<>=!~,\s].*', '', package_name).strip()
|
|
||||||
|
|
||||||
if not package_name:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {
|
|
||||||
"name": package_name,
|
|
||||||
"version_specifiers": version_specifiers,
|
|
||||||
"extras": extras,
|
|
||||||
"line_number": line_number,
|
|
||||||
"raw_line": line.strip(),
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Failed to parse requirement line '{line}': {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def _analyze_dependency_health(self, dependencies: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
||||||
"""Analyze overall health of dependencies."""
|
|
||||||
healthy = []
|
|
||||||
issues = []
|
|
||||||
|
|
||||||
for dep in dependencies:
|
|
||||||
name = dep["name"]
|
|
||||||
version_specs = dep["version_specifiers"]
|
|
||||||
|
|
||||||
# Check for problematic version specifications
|
|
||||||
health_issues = []
|
|
||||||
|
|
||||||
if not version_specs:
|
|
||||||
health_issues.append("No version constraint (could lead to instability)")
|
|
||||||
else:
|
|
||||||
# Check for overly restrictive versions
|
|
||||||
exact_versions = [spec for spec in version_specs if spec["operator"] == "=="]
|
|
||||||
if exact_versions:
|
|
||||||
health_issues.append("Exact version pinning (may cause conflicts)")
|
|
||||||
|
|
||||||
# Check for very loose constraints
|
|
||||||
loose_constraints = [spec for spec in version_specs if spec["operator"] in [">", ">="]]
|
|
||||||
if loose_constraints and not any(spec["operator"] in ["<", "<="] for spec in version_specs):
|
|
||||||
health_issues.append("No upper bound (may break with future versions)")
|
|
||||||
|
|
||||||
if health_issues:
|
|
||||||
issues.append({
|
|
||||||
"package": name,
|
|
||||||
"issues": health_issues,
|
|
||||||
"current_spec": version_specs
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
healthy.append({
|
|
||||||
"package": name,
|
|
||||||
"version_spec": version_specs
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"healthy": healthy,
|
|
||||||
"issues": issues,
|
|
||||||
"health_score": len(healthy) / len(dependencies) * 100 if dependencies else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _check_package_updates(self, dependencies: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
||||||
"""Check for available package updates."""
|
|
||||||
outdated = []
|
|
||||||
current = []
|
|
||||||
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
# Process in batches to avoid overwhelming PyPI
|
|
||||||
batch_size = 10
|
|
||||||
for i in range(0, len(dependencies), batch_size):
|
|
||||||
batch = dependencies[i:i + batch_size]
|
|
||||||
batch_tasks = []
|
|
||||||
|
|
||||||
for dep in batch:
|
|
||||||
task = self._check_single_package_update(client, dep)
|
|
||||||
batch_tasks.append(task)
|
|
||||||
|
|
||||||
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for dep, result in zip(batch, batch_results):
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
logger.debug(f"Failed to check updates for {dep['name']}: {result}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if result["has_update"]:
|
|
||||||
outdated.append(result)
|
|
||||||
else:
|
|
||||||
current.append(result)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"outdated": outdated,
|
|
||||||
"current": current,
|
|
||||||
"update_percentage": len(outdated) / len(dependencies) * 100 if dependencies else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _check_single_package_update(self, client: PyPIClient, dep: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Check if a single package has updates available."""
|
|
||||||
try:
|
|
||||||
package_data = await client.get_package_info(dep["name"])
|
|
||||||
latest_version = package_data["info"]["version"]
|
|
||||||
|
|
||||||
# For now, we'll do a simple comparison
|
|
||||||
# In a real implementation, you'd want proper version comparison
|
|
||||||
has_update = True # Placeholder logic
|
|
||||||
|
|
||||||
return {
|
|
||||||
"package": dep["name"],
|
|
||||||
"current_spec": dep["version_specifiers"],
|
|
||||||
"latest_version": latest_version,
|
|
||||||
"has_update": has_update,
|
|
||||||
"update_recommendation": f"Update to {latest_version}"
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"package": dep["name"],
|
|
||||||
"current_spec": dep["version_specifiers"],
|
|
||||||
"latest_version": "unknown",
|
|
||||||
"has_update": False,
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _scan_dependencies_security(self, dependencies: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
||||||
"""Scan dependencies for security vulnerabilities."""
|
|
||||||
# Import security scanner if available
|
|
||||||
try:
|
|
||||||
from .security import scan_package_security
|
|
||||||
|
|
||||||
vulnerabilities = []
|
|
||||||
secure = []
|
|
||||||
|
|
||||||
# Process in small batches
|
|
||||||
batch_size = 5
|
|
||||||
for i in range(0, len(dependencies), batch_size):
|
|
||||||
batch = dependencies[i:i + batch_size]
|
|
||||||
batch_tasks = []
|
|
||||||
|
|
||||||
for dep in batch:
|
|
||||||
task = self._scan_single_dependency_security(dep)
|
|
||||||
batch_tasks.append(task)
|
|
||||||
|
|
||||||
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for dep, result in zip(batch, batch_results):
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
logger.debug(f"Failed to scan security for {dep['name']}: {result}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if result["vulnerabilities"]:
|
|
||||||
vulnerabilities.append(result)
|
|
||||||
else:
|
|
||||||
secure.append(result)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"vulnerabilities": vulnerabilities,
|
|
||||||
"secure": secure,
|
|
||||||
"vulnerability_count": sum(len(v["vulnerabilities"]) for v in vulnerabilities),
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
logger.warning("Security scanner not available")
|
|
||||||
return await self._empty_security_result()
|
|
||||||
|
|
||||||
async def _scan_single_dependency_security(self, dep: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Scan a single dependency for security issues."""
|
|
||||||
try:
|
|
||||||
from .security import scan_package_security
|
|
||||||
|
|
||||||
result = await scan_package_security(
|
|
||||||
dep["name"],
|
|
||||||
version=None, # Latest version
|
|
||||||
include_dependencies=False
|
|
||||||
)
|
|
||||||
|
|
||||||
vuln_summary = result.get("security_summary", {})
|
|
||||||
return {
|
|
||||||
"package": dep["name"],
|
|
||||||
"vulnerabilities": result.get("vulnerabilities", {}).get("direct", []),
|
|
||||||
"risk_level": vuln_summary.get("risk_level", "minimal"),
|
|
||||||
"total_vulnerabilities": vuln_summary.get("total_vulnerabilities", 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"package": dep["name"],
|
|
||||||
"vulnerabilities": [],
|
|
||||||
"risk_level": "unknown",
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _check_dependencies_compatibility(
|
|
||||||
self, dependencies: List[Dict[str, Any]], python_version: Optional[str]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Check Python version compatibility for dependencies."""
|
|
||||||
if not python_version:
|
|
||||||
return await self._empty_compatibility_result()
|
|
||||||
|
|
||||||
compatible = []
|
|
||||||
incompatible = []
|
|
||||||
|
|
||||||
# Process in batches
|
|
||||||
batch_size = 10
|
|
||||||
for i in range(0, len(dependencies), batch_size):
|
|
||||||
batch = dependencies[i:i + batch_size]
|
|
||||||
batch_tasks = []
|
|
||||||
|
|
||||||
for dep in batch:
|
|
||||||
task = self._check_single_dependency_compatibility(dep, python_version)
|
|
||||||
batch_tasks.append(task)
|
|
||||||
|
|
||||||
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for dep, result in zip(batch, batch_results):
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
logger.debug(f"Failed to check compatibility for {dep['name']}: {result}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if result["compatible"]:
|
|
||||||
compatible.append(result)
|
|
||||||
else:
|
|
||||||
incompatible.append(result)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"compatible": compatible,
|
|
||||||
"incompatible": incompatible,
|
|
||||||
"python_version": python_version,
|
|
||||||
"compatibility_percentage": len(compatible) / len(dependencies) * 100 if dependencies else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _check_single_dependency_compatibility(
|
|
||||||
self, dep: Dict[str, Any], python_version: str
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Check compatibility for a single dependency."""
|
|
||||||
try:
|
|
||||||
from .compatibility_check import check_python_compatibility
|
|
||||||
|
|
||||||
# Extract target Python version (simplified)
|
|
||||||
target_version = "3.9" # Default fallback
|
|
||||||
version_match = re.search(r'(\d+\.\d+)', python_version)
|
|
||||||
if version_match:
|
|
||||||
target_version = version_match.group(1)
|
|
||||||
|
|
||||||
result = await check_python_compatibility(dep["name"], target_version)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"package": dep["name"],
|
|
||||||
"compatible": result.get("compatible", False),
|
|
||||||
"python_version": target_version,
|
|
||||||
"details": result.get("compatibility_info", "")
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"package": dep["name"],
|
|
||||||
"compatible": True, # Assume compatible on error
|
|
||||||
"python_version": python_version,
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Helper methods for empty results
|
|
||||||
async def _empty_updates_result(self) -> Dict[str, Any]:
|
|
||||||
return {"outdated": [], "current": [], "update_percentage": 0}
|
|
||||||
|
|
||||||
async def _empty_security_result(self) -> Dict[str, Any]:
|
|
||||||
return {"vulnerabilities": [], "secure": [], "vulnerability_count": 0}
|
|
||||||
|
|
||||||
async def _empty_compatibility_result(self) -> Dict[str, Any]:
|
|
||||||
return {"compatible": [], "incompatible": [], "python_version": None, "compatibility_percentage": 100}
|
|
||||||
|
|
||||||
def _generate_analysis_summary(
|
|
||||||
self,
|
|
||||||
dependencies: List[Dict[str, Any]],
|
|
||||||
health_analysis: Dict[str, Any],
|
|
||||||
update_analysis: Dict[str, Any],
|
|
||||||
security_analysis: Dict[str, Any],
|
|
||||||
compatibility_analysis: Dict[str, Any]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Generate comprehensive analysis summary."""
|
|
||||||
return {
|
|
||||||
"total_dependencies": len(dependencies),
|
|
||||||
"health_score": round(health_analysis.get("health_score", 0), 1),
|
|
||||||
"packages_with_issues": len(health_analysis.get("issues", [])),
|
|
||||||
"outdated_packages": len(update_analysis.get("outdated", [])),
|
|
||||||
"security_vulnerabilities": security_analysis.get("vulnerability_count", 0),
|
|
||||||
"compatibility_issues": len(compatibility_analysis.get("incompatible", [])),
|
|
||||||
"overall_risk_level": self._calculate_overall_risk_level(
|
|
||||||
health_analysis, update_analysis, security_analysis, compatibility_analysis
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
def _calculate_overall_risk_level(
|
|
||||||
self, health: Dict[str, Any], updates: Dict[str, Any],
|
|
||||||
security: Dict[str, Any], compatibility: Dict[str, Any]
|
|
||||||
) -> str:
|
|
||||||
"""Calculate overall risk level for the project."""
|
|
||||||
risk_score = 0
|
|
||||||
|
|
||||||
# Health risks
|
|
||||||
health_score = health.get("health_score", 100)
|
|
||||||
if health_score < 50:
|
|
||||||
risk_score += 30
|
|
||||||
elif health_score < 75:
|
|
||||||
risk_score += 15
|
|
||||||
|
|
||||||
# Security risks
|
|
||||||
vuln_count = security.get("vulnerability_count", 0)
|
|
||||||
if vuln_count > 10:
|
|
||||||
risk_score += 40
|
|
||||||
elif vuln_count > 5:
|
|
||||||
risk_score += 25
|
|
||||||
elif vuln_count > 0:
|
|
||||||
risk_score += 15
|
|
||||||
|
|
||||||
# Compatibility risks
|
|
||||||
incompat_count = len(compatibility.get("incompatible", []))
|
|
||||||
if incompat_count > 5:
|
|
||||||
risk_score += 25
|
|
||||||
elif incompat_count > 0:
|
|
||||||
risk_score += 10
|
|
||||||
|
|
||||||
# Update risks (outdated packages)
|
|
||||||
outdated_count = len(updates.get("outdated", []))
|
|
||||||
total_deps = len(updates.get("outdated", [])) + len(updates.get("current", []))
|
|
||||||
if total_deps > 0:
|
|
||||||
outdated_percentage = (outdated_count / total_deps) * 100
|
|
||||||
if outdated_percentage > 50:
|
|
||||||
risk_score += 20
|
|
||||||
elif outdated_percentage > 25:
|
|
||||||
risk_score += 10
|
|
||||||
|
|
||||||
# Calculate risk level
|
|
||||||
if risk_score >= 70:
|
|
||||||
return "critical"
|
|
||||||
elif risk_score >= 50:
|
|
||||||
return "high"
|
|
||||||
elif risk_score >= 30:
|
|
||||||
return "medium"
|
|
||||||
elif risk_score > 0:
|
|
||||||
return "low"
|
|
||||||
else:
|
|
||||||
return "minimal"
|
|
||||||
|
|
||||||
def _generate_requirements_recommendations(
|
|
||||||
self,
|
|
||||||
parsed_requirements: Dict[str, Any],
|
|
||||||
health_analysis: Dict[str, Any],
|
|
||||||
update_analysis: Dict[str, Any],
|
|
||||||
security_analysis: Dict[str, Any],
|
|
||||||
compatibility_analysis: Dict[str, Any],
|
|
||||||
summary: Dict[str, Any]
|
|
||||||
) -> List[str]:
|
|
||||||
"""Generate actionable recommendations for requirements management."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
risk_level = summary.get("overall_risk_level", "minimal")
|
|
||||||
|
|
||||||
# Overall assessment
|
|
||||||
if risk_level == "critical":
|
|
||||||
recommendations.append("🚨 Critical issues detected - immediate action required")
|
|
||||||
elif risk_level == "high":
|
|
||||||
recommendations.append("⚠️ High risk dependencies - review and update urgently")
|
|
||||||
elif risk_level == "medium":
|
|
||||||
recommendations.append("⚠️ Moderate risk - address issues when possible")
|
|
||||||
elif risk_level == "minimal":
|
|
||||||
recommendations.append("✅ Requirements appear healthy")
|
|
||||||
|
|
||||||
# Specific recommendations
|
|
||||||
health_issues = health_analysis.get("issues", [])
|
|
||||||
if health_issues:
|
|
||||||
recommendations.append(f"🔧 Fix {len(health_issues)} dependency specification issues")
|
|
||||||
|
|
||||||
outdated_count = len(update_analysis.get("outdated", []))
|
|
||||||
if outdated_count > 0:
|
|
||||||
recommendations.append(f"📦 Update {outdated_count} outdated packages")
|
|
||||||
|
|
||||||
vuln_count = security_analysis.get("vulnerability_count", 0)
|
|
||||||
if vuln_count > 0:
|
|
||||||
recommendations.append(f"🔒 Address {vuln_count} security vulnerabilities")
|
|
||||||
|
|
||||||
incompat_count = len(compatibility_analysis.get("incompatible", []))
|
|
||||||
if incompat_count > 0:
|
|
||||||
recommendations.append(f"🐍 Fix {incompat_count} Python compatibility issues")
|
|
||||||
|
|
||||||
# File format recommendations
|
|
||||||
file_format = parsed_requirements["file_info"]["format"]
|
|
||||||
if file_format == "requirements.txt":
|
|
||||||
recommendations.append("💡 Consider migrating to pyproject.toml for better dependency management")
|
|
||||||
elif file_format == "unknown":
|
|
||||||
recommendations.append("📝 Use standard requirements file formats (requirements.txt, pyproject.toml)")
|
|
||||||
|
|
||||||
return recommendations
|
|
||||||
|
|
||||||
|
|
||||||
# Main analysis functions
|
|
||||||
async def analyze_project_requirements(
|
|
||||||
file_path: str,
|
|
||||||
check_updates: bool = True,
|
|
||||||
security_scan: bool = True,
|
|
||||||
compatibility_check: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze project requirements file for dependencies, security, and compatibility.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the requirements file
|
|
||||||
check_updates: Whether to check for package updates
|
|
||||||
security_scan: Whether to perform security vulnerability scanning
|
|
||||||
compatibility_check: Whether to check Python version compatibility
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive requirements file analysis
|
|
||||||
"""
|
|
||||||
analyzer = RequirementsAnalyzer()
|
|
||||||
return await analyzer.analyze_requirements_file(
|
|
||||||
file_path, check_updates, security_scan, compatibility_check
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def compare_requirements_files(
|
|
||||||
file_paths: List[str]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Compare multiple requirements files to identify differences and conflicts.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_paths: List of paths to requirements files to compare
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comparative analysis of requirements files
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting requirements comparison for {len(file_paths)} files")
|
|
||||||
|
|
||||||
analyzer = RequirementsAnalyzer()
|
|
||||||
file_analyses = {}
|
|
||||||
|
|
||||||
# Analyze each file
|
|
||||||
for file_path in file_paths:
|
|
||||||
try:
|
|
||||||
analysis = await analyzer.analyze_requirements_file(
|
|
||||||
file_path, check_updates=False, security_scan=False, compatibility_check=False
|
|
||||||
)
|
|
||||||
file_analyses[file_path] = analysis
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to analyze {file_path}: {e}")
|
|
||||||
file_analyses[file_path] = {"error": str(e), "dependencies": []}
|
|
||||||
|
|
||||||
# Compare dependencies
|
|
||||||
all_packages = set()
|
|
||||||
for analysis in file_analyses.values():
|
|
||||||
if "dependencies" in analysis:
|
|
||||||
for dep in analysis["dependencies"]:
|
|
||||||
all_packages.add(dep["name"])
|
|
||||||
|
|
||||||
# Generate comparison results
|
|
||||||
conflicts = []
|
|
||||||
common_packages = []
|
|
||||||
unique_packages = {}
|
|
||||||
|
|
||||||
for package in all_packages:
|
|
||||||
versions_by_file = {}
|
|
||||||
for file_path, analysis in file_analyses.items():
|
|
||||||
if "dependencies" in analysis:
|
|
||||||
for dep in analysis["dependencies"]:
|
|
||||||
if dep["name"] == package:
|
|
||||||
versions_by_file[file_path] = dep["version_specifiers"]
|
|
||||||
break
|
|
||||||
|
|
||||||
if len(versions_by_file) == len(file_paths):
|
|
||||||
# Package is in all files
|
|
||||||
version_specs = list(versions_by_file.values())
|
|
||||||
if len(set(str(spec) for spec in version_specs)) > 1:
|
|
||||||
conflicts.append({
|
|
||||||
"package": package,
|
|
||||||
"versions_by_file": versions_by_file
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
common_packages.append(package)
|
|
||||||
else:
|
|
||||||
# Package is unique to some files
|
|
||||||
for file_path, versions in versions_by_file.items():
|
|
||||||
if file_path not in unique_packages:
|
|
||||||
unique_packages[file_path] = []
|
|
||||||
unique_packages[file_path].append({
|
|
||||||
"package": package,
|
|
||||||
"version_specifiers": versions
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"comparison_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"files_compared": len(file_paths),
|
|
||||||
"file_analyses": file_analyses,
|
|
||||||
"comparison_results": {
|
|
||||||
"total_unique_packages": len(all_packages),
|
|
||||||
"common_packages": common_packages,
|
|
||||||
"conflicting_packages": conflicts,
|
|
||||||
"unique_to_files": unique_packages,
|
|
||||||
},
|
|
||||||
"recommendations": _generate_comparison_recommendations(conflicts, unique_packages, file_analyses)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _generate_comparison_recommendations(
|
|
||||||
conflicts: List[Dict[str, Any]],
|
|
||||||
unique_packages: Dict[str, List[Dict[str, Any]]],
|
|
||||||
file_analyses: Dict[str, Any]
|
|
||||||
) -> List[str]:
|
|
||||||
"""Generate recommendations for requirements file comparison."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
if conflicts:
|
|
||||||
recommendations.append(f"🔄 Resolve {len(conflicts)} version conflicts across files")
|
|
||||||
for conflict in conflicts[:3]: # Show first 3
|
|
||||||
recommendations.append(f" - {conflict['package']}: inconsistent versions")
|
|
||||||
|
|
||||||
if unique_packages:
|
|
||||||
total_unique = sum(len(packages) for packages in unique_packages.values())
|
|
||||||
recommendations.append(f"📦 {total_unique} packages are unique to specific files")
|
|
||||||
|
|
||||||
if not conflicts and not unique_packages:
|
|
||||||
recommendations.append("✅ All requirements files are consistent")
|
|
||||||
|
|
||||||
# File format recommendations
|
|
||||||
formats = set()
|
|
||||||
for analysis in file_analyses.values():
|
|
||||||
if "file_info" in analysis:
|
|
||||||
formats.add(analysis["file_info"]["format"])
|
|
||||||
|
|
||||||
if len(formats) > 1:
|
|
||||||
recommendations.append("📝 Consider standardizing on a single requirements file format")
|
|
||||||
|
|
||||||
return recommendations
|
|
@ -1,143 +0,0 @@
|
|||||||
"""Requirements file analysis tools for Python projects."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict, List
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..tools.requirements_analyzer import analyze_project_requirements, compare_requirements_files
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def analyze_requirements_file_tool(
|
|
||||||
file_path: str,
|
|
||||||
check_updates: bool = True,
|
|
||||||
security_scan: bool = True,
|
|
||||||
compatibility_check: bool = True
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Analyze project requirements file for dependencies, security, and compatibility.
|
|
||||||
|
|
||||||
This tool provides comprehensive analysis of Python project requirements files
|
|
||||||
including dependency parsing, version checking, security vulnerability scanning,
|
|
||||||
Python compatibility assessment, and actionable recommendations for improvements.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path: Path to the requirements file (requirements.txt, pyproject.toml, setup.py, etc.)
|
|
||||||
check_updates: Whether to check for available package updates
|
|
||||||
security_scan: Whether to perform security vulnerability scanning on dependencies
|
|
||||||
compatibility_check: Whether to check Python version compatibility for all dependencies
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comprehensive requirements analysis including:
|
|
||||||
- File information and detected format (requirements.txt, pyproject.toml, etc.)
|
|
||||||
- Parsed dependencies with version specifiers and extras
|
|
||||||
- Dependency health analysis with specification issues and recommendations
|
|
||||||
- Package update analysis showing outdated packages and latest versions
|
|
||||||
- Security vulnerability scan results for all dependencies
|
|
||||||
- Python version compatibility assessment
|
|
||||||
- Overall risk level and actionable improvement recommendations
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
FileNotFoundError: If the requirements file is not found
|
|
||||||
NetworkError: For network-related errors during analysis
|
|
||||||
SearchError: If requirements analysis fails
|
|
||||||
"""
|
|
||||||
logger.info(f"MCP tool: Analyzing requirements file {file_path}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await analyze_project_requirements(
|
|
||||||
file_path=file_path,
|
|
||||||
check_updates=check_updates,
|
|
||||||
security_scan=security_scan,
|
|
||||||
compatibility_check=compatibility_check
|
|
||||||
)
|
|
||||||
|
|
||||||
summary = result.get("analysis_summary", {})
|
|
||||||
total_deps = summary.get("total_dependencies", 0)
|
|
||||||
risk_level = summary.get("overall_risk_level", "unknown")
|
|
||||||
logger.info(f"MCP tool: Requirements analysis completed for {file_path} - {total_deps} dependencies, risk level: {risk_level}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (FileNotFoundError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error analyzing requirements file {file_path}: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Requirements analysis failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"file_path": file_path,
|
|
||||||
"analysis_timestamp": "",
|
|
||||||
"file_info": {"name": file_path, "format": "unknown"},
|
|
||||||
"dependencies": [],
|
|
||||||
"dependency_analysis": {},
|
|
||||||
"analysis_summary": {
|
|
||||||
"total_dependencies": 0,
|
|
||||||
"health_score": 0,
|
|
||||||
"packages_with_issues": 0,
|
|
||||||
"outdated_packages": 0,
|
|
||||||
"security_vulnerabilities": 0,
|
|
||||||
"compatibility_issues": 0,
|
|
||||||
"overall_risk_level": "critical",
|
|
||||||
},
|
|
||||||
"recommendations": [f"❌ Requirements analysis failed: {e}"],
|
|
||||||
"python_requirements": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def compare_multiple_requirements_files(
|
|
||||||
file_paths: List[str]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Compare multiple requirements files to identify differences and conflicts.
|
|
||||||
|
|
||||||
This tool analyzes multiple requirements files simultaneously to identify
|
|
||||||
version conflicts, unique dependencies, and inconsistencies across different
|
|
||||||
project configurations or environments.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_paths: List of paths to requirements files to compare and analyze
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comparative requirements analysis including:
|
|
||||||
- Detailed analysis results for each individual file
|
|
||||||
- Common packages shared across all files
|
|
||||||
- Conflicting package versions between files with specific version details
|
|
||||||
- Packages unique to specific files
|
|
||||||
- Recommendations for resolving conflicts and standardizing requirements
|
|
||||||
- Statistics on package overlap and conflict rates
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If file_paths list is empty
|
|
||||||
NetworkError: For network-related errors during analysis
|
|
||||||
SearchError: If requirements comparison fails
|
|
||||||
"""
|
|
||||||
if not file_paths:
|
|
||||||
raise ValueError("File paths list cannot be empty")
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Comparing {len(file_paths)} requirements files")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await compare_requirements_files(file_paths=file_paths)
|
|
||||||
|
|
||||||
comparison_results = result.get("comparison_results", {})
|
|
||||||
conflicts = len(comparison_results.get("conflicting_packages", []))
|
|
||||||
total_packages = comparison_results.get("total_unique_packages", 0)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Requirements comparison completed - {total_packages} unique packages, {conflicts} conflicts found")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (ValueError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error comparing requirements files: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Requirements comparison failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"comparison_timestamp": "",
|
|
||||||
"files_compared": len(file_paths),
|
|
||||||
"file_analyses": {},
|
|
||||||
"comparison_results": {
|
|
||||||
"total_unique_packages": 0,
|
|
||||||
"common_packages": [],
|
|
||||||
"conflicting_packages": [],
|
|
||||||
"unique_to_files": {},
|
|
||||||
},
|
|
||||||
"recommendations": [f"❌ Requirements comparison failed: {e}"]
|
|
||||||
}
|
|
@ -1,660 +0,0 @@
|
|||||||
"""Security vulnerability scanning and analysis tools for PyPI packages."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from ..core.exceptions import NetworkError, SearchError
|
|
||||||
from ..core.pypi_client import PyPIClient
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class VulnerabilityScanner:
|
|
||||||
"""Comprehensive vulnerability scanner for PyPI packages."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.timeout = 30.0
|
|
||||||
self.session = None
|
|
||||||
|
|
||||||
# Vulnerability database endpoints
|
|
||||||
self.osv_api = "https://api.osv.dev/v1/query"
|
|
||||||
self.safety_db_api = "https://pyup.io/api/v1/safety"
|
|
||||||
self.snyk_api = "https://snyk.io/test/pip"
|
|
||||||
|
|
||||||
# Common vulnerability patterns to look for
|
|
||||||
self.high_risk_patterns = [
|
|
||||||
"remote code execution", "rce", "code injection", "sql injection",
|
|
||||||
"cross-site scripting", "xss", "csrf", "authentication bypass",
|
|
||||||
"privilege escalation", "arbitrary file", "path traversal",
|
|
||||||
"buffer overflow", "memory corruption", "denial of service"
|
|
||||||
]
|
|
||||||
|
|
||||||
async def scan_package(
|
|
||||||
self,
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_dependencies: bool = True,
|
|
||||||
severity_filter: Optional[str] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Comprehensive security scan of a PyPI package.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to scan
|
|
||||||
version: Specific version to scan (optional, defaults to latest)
|
|
||||||
include_dependencies: Whether to scan dependencies too
|
|
||||||
severity_filter: Filter by severity level (low, medium, high, critical)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing security analysis results
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting security scan for package: {package_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get package information
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(package_name, version)
|
|
||||||
|
|
||||||
package_version = version or package_data["info"]["version"]
|
|
||||||
|
|
||||||
# Run parallel vulnerability scans
|
|
||||||
scan_tasks = [
|
|
||||||
self._scan_osv_database(package_name, package_version),
|
|
||||||
self._scan_github_advisories(package_name, package_version),
|
|
||||||
self._analyze_package_metadata(package_data),
|
|
||||||
self._check_dependency_vulnerabilities(package_name, package_version) if include_dependencies else asyncio.create_task(self._empty_result())
|
|
||||||
]
|
|
||||||
|
|
||||||
osv_results, github_results, metadata_analysis, dependency_results = await asyncio.gather(
|
|
||||||
*scan_tasks, return_exceptions=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Consolidate results
|
|
||||||
vulnerabilities = []
|
|
||||||
|
|
||||||
# Process OSV results
|
|
||||||
if not isinstance(osv_results, Exception) and osv_results:
|
|
||||||
vulnerabilities.extend(osv_results.get("vulnerabilities", []))
|
|
||||||
|
|
||||||
# Process GitHub results
|
|
||||||
if not isinstance(github_results, Exception) and github_results:
|
|
||||||
vulnerabilities.extend(github_results.get("vulnerabilities", []))
|
|
||||||
|
|
||||||
# Process dependency vulnerabilities
|
|
||||||
if not isinstance(dependency_results, Exception) and dependency_results:
|
|
||||||
vulnerabilities.extend(dependency_results.get("vulnerabilities", []))
|
|
||||||
|
|
||||||
# Apply severity filter
|
|
||||||
if severity_filter:
|
|
||||||
vulnerabilities = [
|
|
||||||
vuln for vuln in vulnerabilities
|
|
||||||
if vuln.get("severity", "").lower() == severity_filter.lower()
|
|
||||||
]
|
|
||||||
|
|
||||||
# Generate security report
|
|
||||||
security_report = self._generate_security_report(
|
|
||||||
package_name, package_version, vulnerabilities, metadata_analysis
|
|
||||||
)
|
|
||||||
|
|
||||||
return security_report
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Security scan failed for {package_name}: {e}")
|
|
||||||
raise SearchError(f"Security scan failed: {e}") from e
|
|
||||||
|
|
||||||
async def _scan_osv_database(self, package_name: str, version: str) -> Dict[str, Any]:
|
|
||||||
"""Scan package against OSV (Open Source Vulnerabilities) database."""
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
||||||
query_data = {
|
|
||||||
"package": {
|
|
||||||
"name": package_name,
|
|
||||||
"ecosystem": "PyPI"
|
|
||||||
},
|
|
||||||
"version": version
|
|
||||||
}
|
|
||||||
|
|
||||||
response = await client.post(
|
|
||||||
self.osv_api,
|
|
||||||
json=query_data,
|
|
||||||
headers={"Content-Type": "application/json"}
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
data = response.json()
|
|
||||||
vulnerabilities = []
|
|
||||||
|
|
||||||
for vuln in data.get("vulns", []):
|
|
||||||
severity = self._extract_severity_from_osv(vuln)
|
|
||||||
vulnerabilities.append({
|
|
||||||
"id": vuln.get("id", ""),
|
|
||||||
"summary": vuln.get("summary", ""),
|
|
||||||
"details": vuln.get("details", ""),
|
|
||||||
"severity": severity,
|
|
||||||
"published": vuln.get("published", ""),
|
|
||||||
"modified": vuln.get("modified", ""),
|
|
||||||
"source": "OSV",
|
|
||||||
"references": [ref.get("url", "") for ref in vuln.get("references", [])],
|
|
||||||
"affected_versions": self._extract_affected_versions(vuln),
|
|
||||||
"fixed_versions": self._extract_fixed_versions(vuln),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {"vulnerabilities": vulnerabilities, "source": "OSV"}
|
|
||||||
else:
|
|
||||||
logger.warning(f"OSV API returned status {response.status_code}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"OSV database scan failed: {e}")
|
|
||||||
|
|
||||||
return {"vulnerabilities": [], "source": "OSV"}
|
|
||||||
|
|
||||||
async def _scan_github_advisories(self, package_name: str, version: str) -> Dict[str, Any]:
|
|
||||||
"""Scan against GitHub Security Advisories."""
|
|
||||||
try:
|
|
||||||
# GitHub GraphQL API for security advisories
|
|
||||||
query = """
|
|
||||||
query($ecosystem: SecurityAdvisoryEcosystem!, $package: String!) {
|
|
||||||
securityVulnerabilities(ecosystem: $ecosystem, package: $package, first: 100) {
|
|
||||||
nodes {
|
|
||||||
advisory {
|
|
||||||
ghsaId
|
|
||||||
summary
|
|
||||||
description
|
|
||||||
severity
|
|
||||||
publishedAt
|
|
||||||
updatedAt
|
|
||||||
references {
|
|
||||||
url
|
|
||||||
}
|
|
||||||
}
|
|
||||||
vulnerableVersionRange
|
|
||||||
firstPatchedVersion {
|
|
||||||
identifier
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
variables = {
|
|
||||||
"ecosystem": "PIP",
|
|
||||||
"package": package_name
|
|
||||||
}
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
||||||
response = await client.post(
|
|
||||||
"https://api.github.com/graphql",
|
|
||||||
json={"query": query, "variables": variables},
|
|
||||||
headers={
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"User-Agent": "PyPI-Security-Scanner/1.0"
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
data = response.json()
|
|
||||||
vulnerabilities = []
|
|
||||||
|
|
||||||
for vuln_node in data.get("data", {}).get("securityVulnerabilities", {}).get("nodes", []):
|
|
||||||
advisory = vuln_node.get("advisory", {})
|
|
||||||
|
|
||||||
# Check if current version is affected
|
|
||||||
if self._is_version_affected(version, vuln_node.get("vulnerableVersionRange", "")):
|
|
||||||
vulnerabilities.append({
|
|
||||||
"id": advisory.get("ghsaId", ""),
|
|
||||||
"summary": advisory.get("summary", ""),
|
|
||||||
"details": advisory.get("description", ""),
|
|
||||||
"severity": advisory.get("severity", "").lower(),
|
|
||||||
"published": advisory.get("publishedAt", ""),
|
|
||||||
"modified": advisory.get("updatedAt", ""),
|
|
||||||
"source": "GitHub",
|
|
||||||
"references": [ref.get("url", "") for ref in advisory.get("references", [])],
|
|
||||||
"vulnerable_range": vuln_node.get("vulnerableVersionRange", ""),
|
|
||||||
"first_patched": vuln_node.get("firstPatchedVersion", {}).get("identifier", ""),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {"vulnerabilities": vulnerabilities, "source": "GitHub"}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"GitHub advisories scan failed: {e}")
|
|
||||||
|
|
||||||
return {"vulnerabilities": [], "source": "GitHub"}
|
|
||||||
|
|
||||||
async def _analyze_package_metadata(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Analyze package metadata for security indicators."""
|
|
||||||
info = package_data.get("info", {})
|
|
||||||
|
|
||||||
security_indicators = {
|
|
||||||
"metadata_score": 0,
|
|
||||||
"risk_factors": [],
|
|
||||||
"security_features": [],
|
|
||||||
"warnings": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check for security-related information
|
|
||||||
description = (info.get("description") or "").lower()
|
|
||||||
summary = (info.get("summary") or "").lower()
|
|
||||||
keywords = (info.get("keywords") or "").lower()
|
|
||||||
|
|
||||||
combined_text = f"{description} {summary} {keywords}"
|
|
||||||
|
|
||||||
# Look for security mentions
|
|
||||||
if any(term in combined_text for term in ["security", "cryptography", "authentication", "encryption"]):
|
|
||||||
security_indicators["security_features"].append("Contains security-related functionality")
|
|
||||||
security_indicators["metadata_score"] += 20
|
|
||||||
|
|
||||||
# Check for high-risk patterns
|
|
||||||
for pattern in self.high_risk_patterns:
|
|
||||||
if pattern in combined_text:
|
|
||||||
security_indicators["risk_factors"].append(f"Mentions: {pattern}")
|
|
||||||
security_indicators["metadata_score"] -= 10
|
|
||||||
|
|
||||||
# Check package age and maintenance
|
|
||||||
if info.get("author_email"):
|
|
||||||
security_indicators["metadata_score"] += 10
|
|
||||||
|
|
||||||
if info.get("home_page"):
|
|
||||||
security_indicators["metadata_score"] += 5
|
|
||||||
|
|
||||||
# Check for classifiers
|
|
||||||
classifiers = info.get("classifiers", [])
|
|
||||||
for classifier in classifiers:
|
|
||||||
if "Development Status :: 5 - Production/Stable" in classifier:
|
|
||||||
security_indicators["metadata_score"] += 15
|
|
||||||
security_indicators["security_features"].append("Production stable status")
|
|
||||||
elif "License ::" in classifier:
|
|
||||||
security_indicators["metadata_score"] += 5
|
|
||||||
|
|
||||||
# Check for suspicious patterns
|
|
||||||
if not info.get("author") and not info.get("maintainer"):
|
|
||||||
security_indicators["warnings"].append("No author or maintainer information")
|
|
||||||
security_indicators["metadata_score"] -= 20
|
|
||||||
|
|
||||||
if len(info.get("description", "")) < 50:
|
|
||||||
security_indicators["warnings"].append("Very brief or missing description")
|
|
||||||
security_indicators["metadata_score"] -= 10
|
|
||||||
|
|
||||||
return security_indicators
|
|
||||||
|
|
||||||
async def _check_dependency_vulnerabilities(self, package_name: str, version: str) -> Dict[str, Any]:
|
|
||||||
"""Check vulnerabilities in package dependencies."""
|
|
||||||
try:
|
|
||||||
# Get package dependencies
|
|
||||||
async with PyPIClient() as client:
|
|
||||||
package_data = await client.get_package_info(package_name, version)
|
|
||||||
|
|
||||||
# Extract dependencies
|
|
||||||
requires_dist = package_data.get("info", {}).get("requires_dist", []) or []
|
|
||||||
dependencies = []
|
|
||||||
|
|
||||||
for req in requires_dist:
|
|
||||||
# Parse dependency name (simplified)
|
|
||||||
dep_name = req.split()[0].split(">=")[0].split("==")[0].split("~=")[0].split("!=")[0]
|
|
||||||
if dep_name and not dep_name.startswith("extra"):
|
|
||||||
dependencies.append(dep_name)
|
|
||||||
|
|
||||||
# Scan top dependencies for vulnerabilities
|
|
||||||
dependency_vulnerabilities = []
|
|
||||||
|
|
||||||
# Limit to top 10 dependencies to avoid overwhelming the system
|
|
||||||
for dep_name in dependencies[:10]:
|
|
||||||
try:
|
|
||||||
dep_scan = await self._scan_osv_database(dep_name, "latest")
|
|
||||||
for vuln in dep_scan.get("vulnerabilities", []):
|
|
||||||
vuln["dependency"] = dep_name
|
|
||||||
vuln["type"] = "dependency_vulnerability"
|
|
||||||
dependency_vulnerabilities.append(vuln)
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Failed to scan dependency {dep_name}: {e}")
|
|
||||||
|
|
||||||
return {"vulnerabilities": dependency_vulnerabilities, "source": "dependencies"}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Dependency vulnerability check failed: {e}")
|
|
||||||
return {"vulnerabilities": [], "source": "dependencies"}
|
|
||||||
|
|
||||||
async def _empty_result(self) -> Dict[str, Any]:
|
|
||||||
"""Return empty result for disabled scans."""
|
|
||||||
return {"vulnerabilities": [], "source": "disabled"}
|
|
||||||
|
|
||||||
def _extract_severity_from_osv(self, vuln_data: Dict[str, Any]) -> str:
|
|
||||||
"""Extract severity from OSV vulnerability data."""
|
|
||||||
# OSV uses CVSS scores, map to common severity levels
|
|
||||||
severity_data = vuln_data.get("severity", [])
|
|
||||||
if severity_data:
|
|
||||||
score = severity_data[0].get("score", "")
|
|
||||||
if "CVSS:" in score:
|
|
||||||
# Extract CVSS score
|
|
||||||
try:
|
|
||||||
cvss_score = float(score.split("/")[1])
|
|
||||||
if cvss_score >= 9.0:
|
|
||||||
return "critical"
|
|
||||||
elif cvss_score >= 7.0:
|
|
||||||
return "high"
|
|
||||||
elif cvss_score >= 4.0:
|
|
||||||
return "medium"
|
|
||||||
else:
|
|
||||||
return "low"
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return "unknown"
|
|
||||||
|
|
||||||
def _extract_affected_versions(self, vuln_data: Dict[str, Any]) -> List[str]:
|
|
||||||
"""Extract affected version ranges from vulnerability data."""
|
|
||||||
affected = vuln_data.get("affected", [])
|
|
||||||
version_ranges = []
|
|
||||||
|
|
||||||
for affect in affected:
|
|
||||||
ranges = affect.get("ranges", [])
|
|
||||||
for range_data in ranges:
|
|
||||||
events = range_data.get("events", [])
|
|
||||||
for event in events:
|
|
||||||
if "introduced" in event:
|
|
||||||
version_ranges.append(f">= {event['introduced']}")
|
|
||||||
elif "fixed" in event:
|
|
||||||
version_ranges.append(f"< {event['fixed']}")
|
|
||||||
|
|
||||||
return version_ranges
|
|
||||||
|
|
||||||
def _extract_fixed_versions(self, vuln_data: Dict[str, Any]) -> List[str]:
|
|
||||||
"""Extract fixed versions from vulnerability data."""
|
|
||||||
affected = vuln_data.get("affected", [])
|
|
||||||
fixed_versions = []
|
|
||||||
|
|
||||||
for affect in affected:
|
|
||||||
ranges = affect.get("ranges", [])
|
|
||||||
for range_data in ranges:
|
|
||||||
events = range_data.get("events", [])
|
|
||||||
for event in events:
|
|
||||||
if "fixed" in event:
|
|
||||||
fixed_versions.append(event["fixed"])
|
|
||||||
|
|
||||||
return fixed_versions
|
|
||||||
|
|
||||||
def _is_version_affected(self, version: str, vulnerable_range: str) -> bool:
|
|
||||||
"""Check if a version is affected by a vulnerability range."""
|
|
||||||
# Simplified version checking - in production would use packaging.specifiers
|
|
||||||
if not vulnerable_range:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Basic patterns
|
|
||||||
if "< " in vulnerable_range:
|
|
||||||
try:
|
|
||||||
limit = vulnerable_range.split("< ")[1].strip()
|
|
||||||
return version < limit
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if ">= " in vulnerable_range:
|
|
||||||
try:
|
|
||||||
limit = vulnerable_range.split(">= ")[1].strip()
|
|
||||||
return version >= limit
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return True # Assume affected if we can't parse
|
|
||||||
|
|
||||||
def _generate_security_report(
|
|
||||||
self,
|
|
||||||
package_name: str,
|
|
||||||
version: str,
|
|
||||||
vulnerabilities: List[Dict[str, Any]],
|
|
||||||
metadata_analysis: Dict[str, Any]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Generate comprehensive security report."""
|
|
||||||
|
|
||||||
# Categorize vulnerabilities by severity
|
|
||||||
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0}
|
|
||||||
dependency_vulns = []
|
|
||||||
direct_vulns = []
|
|
||||||
|
|
||||||
for vuln in vulnerabilities:
|
|
||||||
severity = vuln.get("severity", "unknown")
|
|
||||||
severity_counts[severity] = severity_counts.get(severity, 0) + 1
|
|
||||||
|
|
||||||
if vuln.get("type") == "dependency_vulnerability":
|
|
||||||
dependency_vulns.append(vuln)
|
|
||||||
else:
|
|
||||||
direct_vulns.append(vuln)
|
|
||||||
|
|
||||||
# Calculate risk score
|
|
||||||
risk_score = self._calculate_risk_score(severity_counts, metadata_analysis)
|
|
||||||
|
|
||||||
# Generate recommendations
|
|
||||||
recommendations = self._generate_security_recommendations(
|
|
||||||
vulnerabilities, metadata_analysis, risk_score
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"package": package_name,
|
|
||||||
"version": version,
|
|
||||||
"scan_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
||||||
"security_summary": {
|
|
||||||
"total_vulnerabilities": len(vulnerabilities),
|
|
||||||
"direct_vulnerabilities": len(direct_vulns),
|
|
||||||
"dependency_vulnerabilities": len(dependency_vulns),
|
|
||||||
"severity_breakdown": severity_counts,
|
|
||||||
"risk_score": risk_score,
|
|
||||||
"risk_level": self._get_risk_level(risk_score),
|
|
||||||
},
|
|
||||||
"vulnerabilities": {
|
|
||||||
"direct": direct_vulns,
|
|
||||||
"dependencies": dependency_vulns,
|
|
||||||
},
|
|
||||||
"metadata_analysis": metadata_analysis,
|
|
||||||
"recommendations": recommendations,
|
|
||||||
"scan_details": {
|
|
||||||
"sources_checked": ["OSV", "GitHub", "Metadata"],
|
|
||||||
"dependencies_scanned": len(dependency_vulns) > 0,
|
|
||||||
"scan_completion": "success",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _calculate_risk_score(self, severity_counts: Dict[str, int], metadata_analysis: Dict[str, Any]) -> float:
|
|
||||||
"""Calculate overall risk score (0-100)."""
|
|
||||||
score = 0.0
|
|
||||||
|
|
||||||
# Vulnerability scoring (0-80 points)
|
|
||||||
score += severity_counts.get("critical", 0) * 20
|
|
||||||
score += severity_counts.get("high", 0) * 15
|
|
||||||
score += severity_counts.get("medium", 0) * 8
|
|
||||||
score += severity_counts.get("low", 0) * 3
|
|
||||||
|
|
||||||
# Metadata scoring (0-20 points)
|
|
||||||
metadata_score = metadata_analysis.get("metadata_score", 0)
|
|
||||||
if metadata_score < 0:
|
|
||||||
score += abs(metadata_score) / 5 # Convert negative metadata score to risk
|
|
||||||
else:
|
|
||||||
score -= metadata_score / 10 # Good metadata reduces risk
|
|
||||||
|
|
||||||
# Cap at 100
|
|
||||||
return min(max(score, 0), 100)
|
|
||||||
|
|
||||||
def _get_risk_level(self, risk_score: float) -> str:
|
|
||||||
"""Convert risk score to risk level."""
|
|
||||||
if risk_score >= 80:
|
|
||||||
return "critical"
|
|
||||||
elif risk_score >= 60:
|
|
||||||
return "high"
|
|
||||||
elif risk_score >= 30:
|
|
||||||
return "medium"
|
|
||||||
elif risk_score > 0:
|
|
||||||
return "low"
|
|
||||||
else:
|
|
||||||
return "minimal"
|
|
||||||
|
|
||||||
def _generate_security_recommendations(
|
|
||||||
self,
|
|
||||||
vulnerabilities: List[Dict[str, Any]],
|
|
||||||
metadata_analysis: Dict[str, Any],
|
|
||||||
risk_score: float
|
|
||||||
) -> List[str]:
|
|
||||||
"""Generate actionable security recommendations."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
if len(vulnerabilities) > 0:
|
|
||||||
recommendations.append(f"🚨 Found {len(vulnerabilities)} security vulnerabilities - review and update immediately")
|
|
||||||
|
|
||||||
# Check for critical/high severity
|
|
||||||
critical_high = [v for v in vulnerabilities if v.get("severity") in ["critical", "high"]]
|
|
||||||
if critical_high:
|
|
||||||
recommendations.append(f"⚠️ {len(critical_high)} critical/high severity vulnerabilities require immediate attention")
|
|
||||||
|
|
||||||
# Check for fixed versions
|
|
||||||
fixed_versions = []
|
|
||||||
for vuln in vulnerabilities:
|
|
||||||
fixed = vuln.get("fixed_versions", []) or [vuln.get("first_patched", "")]
|
|
||||||
fixed_versions.extend([v for v in fixed if v])
|
|
||||||
|
|
||||||
if fixed_versions:
|
|
||||||
latest_fixed = max(fixed_versions) if fixed_versions else None
|
|
||||||
if latest_fixed:
|
|
||||||
recommendations.append(f"📦 Update to version {latest_fixed} or later to fix known vulnerabilities")
|
|
||||||
|
|
||||||
# Metadata recommendations
|
|
||||||
warnings = metadata_analysis.get("warnings", [])
|
|
||||||
if warnings:
|
|
||||||
recommendations.append(f"⚠️ Package metadata issues: {', '.join(warnings)}")
|
|
||||||
|
|
||||||
if metadata_analysis.get("metadata_score", 0) < 20:
|
|
||||||
recommendations.append("📝 Package has poor metadata quality - verify trustworthiness before use")
|
|
||||||
|
|
||||||
# General recommendations based on risk score
|
|
||||||
if risk_score >= 60:
|
|
||||||
recommendations.append("🛑 High risk package - consider alternatives or additional security review")
|
|
||||||
elif risk_score >= 30:
|
|
||||||
recommendations.append("⚠️ Moderate risk - monitor for updates and security patches")
|
|
||||||
elif len(vulnerabilities) == 0:
|
|
||||||
recommendations.append("✅ No known vulnerabilities found - package appears secure")
|
|
||||||
|
|
||||||
return recommendations
|
|
||||||
|
|
||||||
|
|
||||||
# Main scanning functions
|
|
||||||
async def scan_package_security(
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_dependencies: bool = True,
|
|
||||||
severity_filter: Optional[str] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Scan a PyPI package for security vulnerabilities.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to scan
|
|
||||||
version: Specific version to scan (optional)
|
|
||||||
include_dependencies: Whether to scan dependencies
|
|
||||||
severity_filter: Filter by severity (low, medium, high, critical)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Comprehensive security scan results
|
|
||||||
"""
|
|
||||||
scanner = VulnerabilityScanner()
|
|
||||||
return await scanner.scan_package(
|
|
||||||
package_name, version, include_dependencies, severity_filter
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def bulk_security_scan(
|
|
||||||
package_names: List[str],
|
|
||||||
include_dependencies: bool = False,
|
|
||||||
severity_threshold: str = "medium"
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Perform bulk security scanning of multiple packages.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_names: List of package names to scan
|
|
||||||
include_dependencies: Whether to scan dependencies
|
|
||||||
severity_threshold: Minimum severity to report
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Bulk scan results with summary
|
|
||||||
"""
|
|
||||||
logger.info(f"Starting bulk security scan of {len(package_names)} packages")
|
|
||||||
|
|
||||||
scanner = VulnerabilityScanner()
|
|
||||||
scan_results = {}
|
|
||||||
summary = {
|
|
||||||
"total_packages": len(package_names),
|
|
||||||
"packages_with_vulnerabilities": 0,
|
|
||||||
"total_vulnerabilities": 0,
|
|
||||||
"high_risk_packages": [],
|
|
||||||
"scan_timestamp": datetime.now(timezone.utc).isoformat()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Scan packages in parallel batches
|
|
||||||
batch_size = 5
|
|
||||||
for i in range(0, len(package_names), batch_size):
|
|
||||||
batch = package_names[i:i + batch_size]
|
|
||||||
batch_tasks = [
|
|
||||||
scanner.scan_package(pkg_name, include_dependencies=include_dependencies)
|
|
||||||
for pkg_name in batch
|
|
||||||
]
|
|
||||||
|
|
||||||
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
|
|
||||||
|
|
||||||
for pkg_name, result in zip(batch, batch_results):
|
|
||||||
if isinstance(result, Exception):
|
|
||||||
scan_results[pkg_name] = {
|
|
||||||
"error": str(result),
|
|
||||||
"scan_status": "failed"
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
scan_results[pkg_name] = result
|
|
||||||
|
|
||||||
# Update summary
|
|
||||||
vuln_count = result.get("security_summary", {}).get("total_vulnerabilities", 0)
|
|
||||||
if vuln_count > 0:
|
|
||||||
summary["packages_with_vulnerabilities"] += 1
|
|
||||||
summary["total_vulnerabilities"] += vuln_count
|
|
||||||
|
|
||||||
risk_level = result.get("security_summary", {}).get("risk_level", "")
|
|
||||||
if risk_level in ["high", "critical"]:
|
|
||||||
summary["high_risk_packages"].append({
|
|
||||||
"package": pkg_name,
|
|
||||||
"risk_level": risk_level,
|
|
||||||
"vulnerabilities": vuln_count
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"summary": summary,
|
|
||||||
"detailed_results": scan_results,
|
|
||||||
"recommendations": _generate_bulk_recommendations(summary, scan_results)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _generate_bulk_recommendations(summary: Dict[str, Any], results: Dict[str, Any]) -> List[str]:
|
|
||||||
"""Generate recommendations for bulk scan results."""
|
|
||||||
recommendations = []
|
|
||||||
|
|
||||||
vuln_packages = summary["packages_with_vulnerabilities"]
|
|
||||||
total_packages = summary["total_packages"]
|
|
||||||
|
|
||||||
if vuln_packages == 0:
|
|
||||||
recommendations.append("✅ No security vulnerabilities found in any scanned packages")
|
|
||||||
else:
|
|
||||||
percentage = (vuln_packages / total_packages) * 100
|
|
||||||
recommendations.append(
|
|
||||||
f"🚨 {vuln_packages}/{total_packages} packages ({percentage:.1f}%) have security vulnerabilities"
|
|
||||||
)
|
|
||||||
|
|
||||||
high_risk = summary["high_risk_packages"]
|
|
||||||
if high_risk:
|
|
||||||
recommendations.append(
|
|
||||||
f"⚠️ {len(high_risk)} packages are high/critical risk: {', '.join([p['package'] for p in high_risk])}"
|
|
||||||
)
|
|
||||||
recommendations.append("🛑 Priority: Address high-risk packages immediately")
|
|
||||||
|
|
||||||
if summary["total_vulnerabilities"] > 0:
|
|
||||||
recommendations.append(f"📊 Total vulnerabilities found: {summary['total_vulnerabilities']}")
|
|
||||||
recommendations.append("🔍 Review detailed results and update affected packages")
|
|
||||||
|
|
||||||
return recommendations
|
|
@ -1,147 +0,0 @@
|
|||||||
"""Security vulnerability scanning tools for PyPI packages."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from ..tools.security import bulk_security_scan, scan_package_security
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def scan_pypi_package_security(
|
|
||||||
package_name: str,
|
|
||||||
version: Optional[str] = None,
|
|
||||||
include_dependencies: bool = True,
|
|
||||||
severity_filter: Optional[str] = None
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Scan a PyPI package for security vulnerabilities.
|
|
||||||
|
|
||||||
This tool performs comprehensive security vulnerability scanning of PyPI packages,
|
|
||||||
checking against multiple vulnerability databases including OSV (Open Source Vulnerabilities),
|
|
||||||
GitHub Security Advisories, and analyzing package metadata for security indicators.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: Name of the package to scan for vulnerabilities
|
|
||||||
version: Specific version to scan (optional, defaults to latest version)
|
|
||||||
include_dependencies: Whether to scan package dependencies for vulnerabilities
|
|
||||||
severity_filter: Filter results by severity level (low, medium, high, critical)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing comprehensive security scan results including:
|
|
||||||
- Total vulnerability count and severity breakdown
|
|
||||||
- Direct package vulnerabilities vs dependency vulnerabilities
|
|
||||||
- Risk score and level assessment (minimal, low, medium, high, critical)
|
|
||||||
- Detailed vulnerability information with IDs, descriptions, and references
|
|
||||||
- Package metadata security analysis
|
|
||||||
- Actionable security recommendations
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
InvalidPackageNameError: If package name is empty or invalid
|
|
||||||
PackageNotFoundError: If package is not found on PyPI
|
|
||||||
NetworkError: For network-related errors
|
|
||||||
SearchError: If security scanning fails
|
|
||||||
"""
|
|
||||||
if not package_name or not package_name.strip():
|
|
||||||
raise InvalidPackageNameError(package_name)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Scanning security for package {package_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await scan_package_security(
|
|
||||||
package_name=package_name,
|
|
||||||
version=version,
|
|
||||||
include_dependencies=include_dependencies,
|
|
||||||
severity_filter=severity_filter
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Security scan completed for {package_name} - found {result.get('security_summary', {}).get('total_vulnerabilities', 0)} vulnerabilities")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (InvalidPackageNameError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error scanning security for {package_name}: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Security scan failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"package": package_name,
|
|
||||||
"version": version,
|
|
||||||
"scan_timestamp": "",
|
|
||||||
"security_summary": {
|
|
||||||
"total_vulnerabilities": 0,
|
|
||||||
"direct_vulnerabilities": 0,
|
|
||||||
"dependency_vulnerabilities": 0,
|
|
||||||
"severity_breakdown": {"critical": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0},
|
|
||||||
"risk_score": 0,
|
|
||||||
"risk_level": "unknown",
|
|
||||||
},
|
|
||||||
"vulnerabilities": {"direct": [], "dependencies": []},
|
|
||||||
"metadata_analysis": {},
|
|
||||||
"recommendations": [f"❌ Security scan failed: {e}"],
|
|
||||||
"scan_details": {
|
|
||||||
"sources_checked": [],
|
|
||||||
"dependencies_scanned": False,
|
|
||||||
"scan_completion": "error",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def bulk_scan_package_security(
|
|
||||||
package_names: List[str],
|
|
||||||
include_dependencies: bool = False,
|
|
||||||
severity_threshold: str = "medium"
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Perform bulk security scanning of multiple PyPI packages.
|
|
||||||
|
|
||||||
This tool scans multiple packages simultaneously for security vulnerabilities,
|
|
||||||
providing a consolidated report with summary statistics and prioritized
|
|
||||||
recommendations for addressing security issues across your package ecosystem.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_names: List of package names to scan for vulnerabilities
|
|
||||||
include_dependencies: Whether to include dependency vulnerability scanning
|
|
||||||
severity_threshold: Minimum severity level to report (low, medium, high, critical)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing bulk scan results including:
|
|
||||||
- Summary statistics (total packages, packages with vulnerabilities, high-risk packages)
|
|
||||||
- Detailed scan results for each package
|
|
||||||
- Prioritized recommendations for security remediation
|
|
||||||
- Scan timestamp and completion status
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If package_names list is empty
|
|
||||||
NetworkError: For network-related errors during scanning
|
|
||||||
SearchError: If bulk scanning fails
|
|
||||||
"""
|
|
||||||
if not package_names:
|
|
||||||
raise ValueError("Package names list cannot be empty")
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Starting bulk security scan of {len(package_names)} packages")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await bulk_security_scan(
|
|
||||||
package_names=package_names,
|
|
||||||
include_dependencies=include_dependencies,
|
|
||||||
severity_threshold=severity_threshold
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"MCP tool: Bulk security scan completed - {result.get('summary', {}).get('packages_with_vulnerabilities', 0)} packages have vulnerabilities")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except (ValueError, NetworkError, SearchError) as e:
|
|
||||||
logger.error(f"Error in bulk security scan: {e}")
|
|
||||||
return {
|
|
||||||
"error": f"Bulk security scan failed: {e}",
|
|
||||||
"error_type": type(e).__name__,
|
|
||||||
"summary": {
|
|
||||||
"total_packages": len(package_names),
|
|
||||||
"packages_with_vulnerabilities": 0,
|
|
||||||
"total_vulnerabilities": 0,
|
|
||||||
"high_risk_packages": [],
|
|
||||||
"scan_timestamp": ""
|
|
||||||
},
|
|
||||||
"detailed_results": {},
|
|
||||||
"recommendations": [f"❌ Bulk security scan failed: {e}"]
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
@ -35,7 +35,6 @@ packaging = "^24.0"
|
|||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
pydantic-settings = "^2.0.0"
|
pydantic-settings = "^2.0.0"
|
||||||
click = "8.1.7"
|
click = "8.1.7"
|
||||||
feedparser = "^6.0.0"
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
pytest = "^8.0.0"
|
pytest = "^8.0.0"
|
||||||
|
@ -1,985 +0,0 @@
|
|||||||
"""Tests for PyPI community and social tools functionality."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
from unittest.mock import AsyncMock, patch, MagicMock
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from pypi_query_mcp.core.exceptions import InvalidPackageNameError, PackageNotFoundError, NetworkError
|
|
||||||
from pypi_query_mcp.tools.community import (
|
|
||||||
get_pypi_package_reviews,
|
|
||||||
manage_pypi_package_discussions,
|
|
||||||
get_pypi_maintainer_contacts,
|
|
||||||
_analyze_github_community_sentiment,
|
|
||||||
_check_stackoverflow_mentions,
|
|
||||||
_analyze_pypi_downloads_as_quality_indicator,
|
|
||||||
_get_community_health_metrics,
|
|
||||||
_calculate_community_score,
|
|
||||||
_generate_community_insights,
|
|
||||||
_extract_contact_info_from_metadata,
|
|
||||||
_find_github_repository,
|
|
||||||
_parse_github_url,
|
|
||||||
_analyze_issue_sentiment,
|
|
||||||
_analyze_stackoverflow_sentiment,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPyPIPackageReviews:
|
|
||||||
"""Test community reviews and feedback functionality."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_package_data(self):
|
|
||||||
"""Mock package data for testing."""
|
|
||||||
return {
|
|
||||||
"info": {
|
|
||||||
"name": "test-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "A test package for community analysis",
|
|
||||||
"description": "A comprehensive test package with detailed description for community testing",
|
|
||||||
"keywords": "test, community, package",
|
|
||||||
"classifiers": [
|
|
||||||
"Development Status :: 4 - Beta",
|
|
||||||
"Intended Audience :: Developers",
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"Topic :: Software Development :: Libraries",
|
|
||||||
],
|
|
||||||
"license": "MIT",
|
|
||||||
"author": "Test Author",
|
|
||||||
"home_page": "https://example.com",
|
|
||||||
"project_urls": {
|
|
||||||
"Documentation": "https://docs.example.com",
|
|
||||||
"Repository": "https://github.com/test/test-package",
|
|
||||||
"Bug Reports": "https://github.com/test/test-package/issues",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_github_sentiment(self):
|
|
||||||
"""Mock GitHub sentiment analysis data."""
|
|
||||||
return {
|
|
||||||
"repository": "https://github.com/test/test-package",
|
|
||||||
"sentiment_analysis": {
|
|
||||||
"overall_sentiment_score": 75.5,
|
|
||||||
"issues_analyzed": 20,
|
|
||||||
"positive_indicators": 15,
|
|
||||||
"negative_indicators": 5,
|
|
||||||
"sentiment_factors": {
|
|
||||||
"closed_issues": 12,
|
|
||||||
"open_issues": 8,
|
|
||||||
"enhancement_requests": 5,
|
|
||||||
"bug_reports": 3,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"repository_stats": {
|
|
||||||
"stargazers_count": 150,
|
|
||||||
"forks_count": 25,
|
|
||||||
"open_issues_count": 8,
|
|
||||||
},
|
|
||||||
"issues_analyzed": 20,
|
|
||||||
"analysis_timestamp": datetime.now().isoformat(),
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_stackoverflow_data(self):
|
|
||||||
"""Mock Stack Overflow mentions data."""
|
|
||||||
return {
|
|
||||||
"questions_found": 5,
|
|
||||||
"sentiment_analysis": {
|
|
||||||
"overall_sentiment_score": 65.0,
|
|
||||||
"questions_analyzed": 5,
|
|
||||||
"positive_indicators": 3,
|
|
||||||
"negative_indicators": 2,
|
|
||||||
"question_characteristics": {
|
|
||||||
"answered_questions": 4,
|
|
||||||
"unanswered_questions": 1,
|
|
||||||
"average_score": 2.4,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"search_timestamp": datetime.now().isoformat(),
|
|
||||||
"data_source": "Stack Overflow API",
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_quality_indicators(self):
|
|
||||||
"""Mock quality indicators data."""
|
|
||||||
return {
|
|
||||||
"download_stats": {
|
|
||||||
"last_month": 50000,
|
|
||||||
"last_week": 12000,
|
|
||||||
"last_day": 2000,
|
|
||||||
},
|
|
||||||
"adoption_level": "moderate",
|
|
||||||
"quality_indicator_score": 50.0,
|
|
||||||
"analysis_timestamp": datetime.now().isoformat(),
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_community_health(self):
|
|
||||||
"""Mock community health metrics."""
|
|
||||||
return {
|
|
||||||
"github_community_health": {
|
|
||||||
"health_percentage": 85,
|
|
||||||
"documentation": {"exists": True},
|
|
||||||
"contributing": {"exists": True},
|
|
||||||
"code_of_conduct": {"exists": True},
|
|
||||||
"license": {"exists": True},
|
|
||||||
"readme": {"exists": True},
|
|
||||||
},
|
|
||||||
"has_repository": True,
|
|
||||||
"repository_url": "https://github.com/test/test-package",
|
|
||||||
}
|
|
||||||
|
|
||||||
async def test_get_pypi_package_reviews_success(
|
|
||||||
self,
|
|
||||||
mock_package_data,
|
|
||||||
mock_github_sentiment,
|
|
||||||
mock_stackoverflow_data,
|
|
||||||
mock_quality_indicators,
|
|
||||||
mock_community_health
|
|
||||||
):
|
|
||||||
"""Test successful retrieval of package reviews."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_github_community_sentiment") as mock_github, \
|
|
||||||
patch("pypi_query_mcp.tools.community._check_stackoverflow_mentions") as mock_stackoverflow, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_community_health_metrics") as mock_health:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data["info"]
|
|
||||||
mock_github.return_value = mock_github_sentiment
|
|
||||||
mock_stackoverflow.return_value = mock_stackoverflow_data
|
|
||||||
mock_quality.return_value = mock_quality_indicators
|
|
||||||
mock_health.return_value = mock_community_health
|
|
||||||
|
|
||||||
result = await get_pypi_package_reviews(
|
|
||||||
package_name="test-package",
|
|
||||||
include_ratings=True,
|
|
||||||
include_community_feedback=True,
|
|
||||||
sentiment_analysis=True,
|
|
||||||
max_reviews=50
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert "community_score" in result
|
|
||||||
assert "metadata" in result
|
|
||||||
assert "community_health" in result
|
|
||||||
assert "quality_indicators" in result
|
|
||||||
assert "insights" in result
|
|
||||||
assert "review_system_status" in result
|
|
||||||
assert "github_community_feedback" in result
|
|
||||||
assert "stackoverflow_mentions" in result
|
|
||||||
assert "sentiment_analysis" in result
|
|
||||||
assert "ratings" in result
|
|
||||||
|
|
||||||
# Check community score structure
|
|
||||||
community_score = result["community_score"]
|
|
||||||
assert "overall_score" in community_score
|
|
||||||
assert "community_status" in community_score
|
|
||||||
assert "score_components" in community_score
|
|
||||||
|
|
||||||
# Check review system status
|
|
||||||
review_status = result["review_system_status"]
|
|
||||||
assert review_status["native_pypi_reviews"] == "not_available"
|
|
||||||
assert review_status["future_ready"] is True
|
|
||||||
|
|
||||||
async def test_get_pypi_package_reviews_invalid_package_name(self):
|
|
||||||
"""Test handling of invalid package name."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_package_reviews("")
|
|
||||||
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_package_reviews(" ")
|
|
||||||
|
|
||||||
async def test_get_pypi_package_reviews_minimal_options(self, mock_package_data):
|
|
||||||
"""Test reviews with minimal options enabled."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_community_health_metrics") as mock_health:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data["info"]
|
|
||||||
mock_quality.return_value = {"quality_indicator_score": 30}
|
|
||||||
mock_health.return_value = {"has_repository": False}
|
|
||||||
|
|
||||||
result = await get_pypi_package_reviews(
|
|
||||||
package_name="test-package",
|
|
||||||
include_ratings=False,
|
|
||||||
include_community_feedback=False,
|
|
||||||
sentiment_analysis=False
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert "github_community_feedback" not in result
|
|
||||||
assert "stackoverflow_mentions" not in result
|
|
||||||
assert "sentiment_analysis" not in result
|
|
||||||
assert "ratings" not in result
|
|
||||||
|
|
||||||
async def test_get_pypi_package_reviews_network_error(self):
|
|
||||||
"""Test handling of network errors."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews", side_effect=NetworkError("Network error")):
|
|
||||||
with pytest.raises(NetworkError):
|
|
||||||
await get_pypi_package_reviews("test-package")
|
|
||||||
|
|
||||||
|
|
||||||
class TestManagePyPIPackageDiscussions:
|
|
||||||
"""Test package discussions management functionality."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_package_data(self):
|
|
||||||
"""Mock package data for discussions testing."""
|
|
||||||
return {
|
|
||||||
"info": {
|
|
||||||
"name": "test-package",
|
|
||||||
"project_urls": {
|
|
||||||
"Repository": "https://github.com/test/test-package",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_discussion_status(self):
|
|
||||||
"""Mock current discussion status."""
|
|
||||||
return {
|
|
||||||
"github_discussions": {
|
|
||||||
"enabled": False,
|
|
||||||
"reason": "requires_github_api_integration",
|
|
||||||
"repository": "https://github.com/test/test-package",
|
|
||||||
},
|
|
||||||
"community_platforms": {
|
|
||||||
"discord": {"available": False},
|
|
||||||
"reddit": {"available": False},
|
|
||||||
"forums": {"available": False},
|
|
||||||
},
|
|
||||||
"native_pypi_discussions": {
|
|
||||||
"available": False,
|
|
||||||
"note": "PyPI does not currently support native discussions",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def test_manage_discussions_get_status(self, mock_package_data, mock_discussion_status):
|
|
||||||
"""Test getting discussion status."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data
|
|
||||||
mock_status.return_value = mock_discussion_status
|
|
||||||
|
|
||||||
result = await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="get_status"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert result["action_performed"] == "get_status"
|
|
||||||
assert "status" in result
|
|
||||||
assert "current_discussion_status" in result
|
|
||||||
assert "available_platforms" in result
|
|
||||||
assert "discussion_system_status" in result
|
|
||||||
|
|
||||||
# Check system status
|
|
||||||
system_status = result["discussion_system_status"]
|
|
||||||
assert system_status["native_pypi_discussions"] == "not_available"
|
|
||||||
assert system_status["future_ready"] is True
|
|
||||||
|
|
||||||
async def test_manage_discussions_enable(self, mock_package_data, mock_discussion_status):
|
|
||||||
"""Test enabling discussions."""
|
|
||||||
discussion_settings = {
|
|
||||||
"categories": ["General", "Q&A", "Ideas"],
|
|
||||||
"moderation": "manual_review",
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data
|
|
||||||
mock_status.return_value = mock_discussion_status
|
|
||||||
|
|
||||||
result = await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="enable",
|
|
||||||
discussion_settings=discussion_settings
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert result["action_performed"] == "enable"
|
|
||||||
assert result["status"] == "configured"
|
|
||||||
assert result["action"] == "enable_discussions"
|
|
||||||
assert "settings_applied" in result
|
|
||||||
assert "next_steps" in result
|
|
||||||
|
|
||||||
async def test_manage_discussions_disable(self, mock_package_data, mock_discussion_status):
|
|
||||||
"""Test disabling discussions."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data
|
|
||||||
mock_status.return_value = mock_discussion_status
|
|
||||||
|
|
||||||
result = await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="disable"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert result["action_performed"] == "disable"
|
|
||||||
assert result["status"] == "configured"
|
|
||||||
assert result["action"] == "disable_discussions"
|
|
||||||
assert "next_steps" in result
|
|
||||||
|
|
||||||
async def test_manage_discussions_configure(self, mock_package_data, mock_discussion_status):
|
|
||||||
"""Test configuring discussions."""
|
|
||||||
discussion_settings = {
|
|
||||||
"categories": ["General", "Q&A", "Ideas", "Show and Tell"],
|
|
||||||
"moderation": "community_moderation",
|
|
||||||
"notifications": ["email_notifications", "web_notifications"],
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data
|
|
||||||
mock_status.return_value = mock_discussion_status
|
|
||||||
|
|
||||||
result = await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="configure",
|
|
||||||
discussion_settings=discussion_settings
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert result["action_performed"] == "configure"
|
|
||||||
assert result["status"] == "configured"
|
|
||||||
assert result["action"] == "configure_discussions"
|
|
||||||
assert "configuration_options" in result
|
|
||||||
|
|
||||||
async def test_manage_discussions_moderate(self, mock_package_data, mock_discussion_status):
|
|
||||||
"""Test moderating discussions."""
|
|
||||||
moderator_controls = {
|
|
||||||
"content_filtering": True,
|
|
||||||
"auto_moderation": True,
|
|
||||||
"moderator_roles": ["owner", "maintainer"],
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data
|
|
||||||
mock_status.return_value = mock_discussion_status
|
|
||||||
|
|
||||||
result = await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="moderate",
|
|
||||||
moderator_controls=moderator_controls
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert result["action_performed"] == "moderate"
|
|
||||||
assert result["status"] == "moderation_configured"
|
|
||||||
assert result["action"] == "moderate_discussions"
|
|
||||||
assert "moderation_features" in result
|
|
||||||
|
|
||||||
async def test_manage_discussions_get_metrics(self, mock_package_data, mock_discussion_status):
|
|
||||||
"""Test getting discussion metrics."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_data
|
|
||||||
mock_status.return_value = mock_discussion_status
|
|
||||||
|
|
||||||
result = await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="get_metrics"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert result["action_performed"] == "get_metrics"
|
|
||||||
assert result["status"] == "metrics_retrieved"
|
|
||||||
assert "github_metrics" in result
|
|
||||||
assert "overall_engagement" in result
|
|
||||||
|
|
||||||
async def test_manage_discussions_invalid_action(self):
|
|
||||||
"""Test handling of invalid action."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await manage_pypi_package_discussions(
|
|
||||||
package_name="test-package",
|
|
||||||
action="invalid_action"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def test_manage_discussions_invalid_package_name(self):
|
|
||||||
"""Test handling of invalid package name."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await manage_pypi_package_discussions("")
|
|
||||||
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await manage_pypi_package_discussions(" ")
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPyPIMaintainerContacts:
|
|
||||||
"""Test maintainer contact information functionality."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_package_metadata(self):
|
|
||||||
"""Mock package metadata for contact testing."""
|
|
||||||
return {
|
|
||||||
"name": "test-package",
|
|
||||||
"author": "Test Author",
|
|
||||||
"author_email": "author@example.com",
|
|
||||||
"maintainer": "Test Maintainer",
|
|
||||||
"maintainer_email": "maintainer@example.com",
|
|
||||||
"home_page": "https://example.com",
|
|
||||||
"project_urls": {
|
|
||||||
"Documentation": "https://docs.example.com",
|
|
||||||
"Repository": "https://github.com/test/test-package",
|
|
||||||
"Bug Reports": "https://github.com/test/test-package/issues",
|
|
||||||
"Support": "https://support.example.com",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_github_info(self):
|
|
||||||
"""Mock GitHub maintainer information."""
|
|
||||||
return {
|
|
||||||
"repository": "https://github.com/test/test-package",
|
|
||||||
"owner": "test",
|
|
||||||
"repository_data": {
|
|
||||||
"owner": {
|
|
||||||
"login": "test",
|
|
||||||
"type": "User",
|
|
||||||
"html_url": "https://github.com/test",
|
|
||||||
},
|
|
||||||
"has_pages": True,
|
|
||||||
"default_branch": "main",
|
|
||||||
},
|
|
||||||
"contributors": [
|
|
||||||
{
|
|
||||||
"login": "test",
|
|
||||||
"contributions": 150,
|
|
||||||
"html_url": "https://github.com/test",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"login": "contributor1",
|
|
||||||
"contributions": 25,
|
|
||||||
"html_url": "https://github.com/contributor1",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"primary_maintainer": {
|
|
||||||
"login": "test",
|
|
||||||
"type": "User",
|
|
||||||
"html_url": "https://github.com/test",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_support_channels(self):
|
|
||||||
"""Mock support channels information."""
|
|
||||||
return {
|
|
||||||
"issue_tracker": "https://github.com/test/test-package/issues",
|
|
||||||
"documentation": "https://test.github.io/test-package/",
|
|
||||||
"community_forum": None,
|
|
||||||
"chat_channels": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_community_channels(self):
|
|
||||||
"""Mock community channels information."""
|
|
||||||
return {
|
|
||||||
"github_discussions": "https://github.com/test/test-package/discussions",
|
|
||||||
"stackoverflow_tag": "https://stackoverflow.com/questions/tagged/test-package",
|
|
||||||
"reddit_community": None,
|
|
||||||
"discord_server": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_contribution_info(self):
|
|
||||||
"""Mock contribution guidelines information."""
|
|
||||||
return {
|
|
||||||
"repository": "https://github.com/test/test-package",
|
|
||||||
"contribution_files": {
|
|
||||||
"CONTRIBUTING.md": True,
|
|
||||||
"CODE_OF_CONDUCT.md": True,
|
|
||||||
"SECURITY.md": False,
|
|
||||||
},
|
|
||||||
"guidelines_available": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def test_get_maintainer_contacts_success(
|
|
||||||
self,
|
|
||||||
mock_package_metadata,
|
|
||||||
mock_github_info,
|
|
||||||
mock_support_channels,
|
|
||||||
mock_community_channels,
|
|
||||||
mock_contribution_info
|
|
||||||
):
|
|
||||||
"""Test successful retrieval of maintainer contacts."""
|
|
||||||
contact_types = ["github", "support", "community"]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_github_maintainer_info") as mock_github, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_support_channels") as mock_support, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_community_channels") as mock_community, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_contribution_guidelines") as mock_contrib:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_metadata
|
|
||||||
mock_github.return_value = mock_github_info
|
|
||||||
mock_support.return_value = mock_support_channels
|
|
||||||
mock_community.return_value = mock_community_channels
|
|
||||||
mock_contrib.return_value = mock_contribution_info
|
|
||||||
|
|
||||||
result = await get_pypi_maintainer_contacts(
|
|
||||||
package_name="test-package",
|
|
||||||
contact_types=contact_types,
|
|
||||||
include_social_profiles=True,
|
|
||||||
include_contribution_guidelines=True,
|
|
||||||
respect_privacy_settings=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert "contact_information" in result
|
|
||||||
assert "accessibility_assessment" in result
|
|
||||||
assert "contact_recommendations" in result
|
|
||||||
assert "privacy_compliance" in result
|
|
||||||
assert "github_information" in result
|
|
||||||
assert "support_channels" in result
|
|
||||||
assert "community_channels" in result
|
|
||||||
assert "contribution_guidelines" in result
|
|
||||||
assert "social_profiles" in result
|
|
||||||
assert "communication_guidelines" in result
|
|
||||||
|
|
||||||
# Check privacy compliance
|
|
||||||
privacy = result["privacy_compliance"]
|
|
||||||
assert privacy["respects_privacy_settings"] is True
|
|
||||||
assert privacy["data_sources"] == "Publicly available information only"
|
|
||||||
|
|
||||||
async def test_get_maintainer_contacts_email_included(self, mock_package_metadata):
|
|
||||||
"""Test contacts with email included and privacy disabled."""
|
|
||||||
contact_types = ["email", "github"]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_github_maintainer_info") as mock_github:
|
|
||||||
|
|
||||||
mock_metadata.return_value = mock_package_metadata
|
|
||||||
mock_github.return_value = {"status": "no_github_repository"}
|
|
||||||
|
|
||||||
result = await get_pypi_maintainer_contacts(
|
|
||||||
package_name="test-package",
|
|
||||||
contact_types=contact_types,
|
|
||||||
respect_privacy_settings=False
|
|
||||||
)
|
|
||||||
|
|
||||||
contact_info = result["contact_information"]
|
|
||||||
assert "available_contacts" in contact_info
|
|
||||||
# When privacy is disabled, emails should be included
|
|
||||||
if not contact_info["privacy_compliant"]:
|
|
||||||
# This would include emails if privacy is disabled
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def test_get_maintainer_contacts_privacy_enabled(self, mock_package_metadata):
|
|
||||||
"""Test contacts with privacy settings enabled."""
|
|
||||||
contact_types = ["email", "github"]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata:
|
|
||||||
mock_metadata.return_value = mock_package_metadata
|
|
||||||
|
|
||||||
result = await get_pypi_maintainer_contacts(
|
|
||||||
package_name="test-package",
|
|
||||||
contact_types=contact_types,
|
|
||||||
respect_privacy_settings=True
|
|
||||||
)
|
|
||||||
|
|
||||||
contact_info = result["contact_information"]
|
|
||||||
assert contact_info["privacy_compliant"] is True
|
|
||||||
# With privacy enabled, emails should be hidden
|
|
||||||
if "email_note" in contact_info.get("available_contacts", {}):
|
|
||||||
assert "hidden due to privacy settings" in contact_info["available_contacts"]["email_note"]
|
|
||||||
|
|
||||||
async def test_get_maintainer_contacts_minimal_options(self, mock_package_metadata):
|
|
||||||
"""Test contacts with minimal options."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata:
|
|
||||||
mock_metadata.return_value = mock_package_metadata
|
|
||||||
|
|
||||||
result = await get_pypi_maintainer_contacts(
|
|
||||||
package_name="test-package",
|
|
||||||
contact_types=["support"],
|
|
||||||
include_social_profiles=False,
|
|
||||||
include_contribution_guidelines=False
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert "contact_information" in result
|
|
||||||
assert "github_information" not in result
|
|
||||||
assert "contribution_guidelines" not in result
|
|
||||||
assert "social_profiles" not in result
|
|
||||||
|
|
||||||
async def test_get_maintainer_contacts_invalid_contact_types(self):
|
|
||||||
"""Test handling of invalid contact types."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_maintainer_contacts(
|
|
||||||
package_name="test-package",
|
|
||||||
contact_types=["invalid_type"]
|
|
||||||
)
|
|
||||||
|
|
||||||
async def test_get_maintainer_contacts_invalid_package_name(self):
|
|
||||||
"""Test handling of invalid package name."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_maintainer_contacts("")
|
|
||||||
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_maintainer_contacts(" ")
|
|
||||||
|
|
||||||
|
|
||||||
class TestHelperFunctions:
|
|
||||||
"""Test helper functions for community tools."""
|
|
||||||
|
|
||||||
def test_parse_github_url_valid(self):
|
|
||||||
"""Test parsing valid GitHub URLs."""
|
|
||||||
test_cases = [
|
|
||||||
("https://github.com/owner/repo", {"repository_url": "https://github.com/owner/repo", "owner": "owner", "repo": "repo"}),
|
|
||||||
("https://github.com/owner/repo.git", {"repository_url": "https://github.com/owner/repo", "owner": "owner", "repo": "repo"}),
|
|
||||||
("https://github.com/owner/repo/", {"repository_url": "https://github.com/owner/repo", "owner": "owner", "repo": "repo"}),
|
|
||||||
]
|
|
||||||
|
|
||||||
for url, expected in test_cases:
|
|
||||||
result = _parse_github_url(url)
|
|
||||||
assert result == expected
|
|
||||||
|
|
||||||
def test_parse_github_url_invalid(self):
|
|
||||||
"""Test parsing invalid GitHub URLs."""
|
|
||||||
test_cases = [
|
|
||||||
"https://gitlab.com/owner/repo",
|
|
||||||
"https://github.com/owner",
|
|
||||||
"https://github.com/",
|
|
||||||
"not-a-url",
|
|
||||||
]
|
|
||||||
|
|
||||||
for url in test_cases:
|
|
||||||
result = _parse_github_url(url)
|
|
||||||
assert "status" in result or "error" in result
|
|
||||||
|
|
||||||
def test_analyze_issue_sentiment_positive(self):
|
|
||||||
"""Test analyzing positive GitHub issue sentiment."""
|
|
||||||
issues_data = {
|
|
||||||
"issues": [
|
|
||||||
{
|
|
||||||
"title": "Enhancement: Add new feature",
|
|
||||||
"state": "closed",
|
|
||||||
"labels": [{"name": "enhancement"}, {"name": "good first issue"}],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "How to use this package?",
|
|
||||||
"state": "closed",
|
|
||||||
"labels": [{"name": "question"}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _analyze_issue_sentiment(issues_data)
|
|
||||||
|
|
||||||
assert result["overall_sentiment_score"] > 50
|
|
||||||
assert result["issues_analyzed"] == 2
|
|
||||||
assert result["sentiment_factors"]["closed_issues"] == 2
|
|
||||||
assert result["sentiment_factors"]["enhancement_requests"] == 1
|
|
||||||
|
|
||||||
def test_analyze_issue_sentiment_negative(self):
|
|
||||||
"""Test analyzing negative GitHub issue sentiment."""
|
|
||||||
issues_data = {
|
|
||||||
"issues": [
|
|
||||||
{
|
|
||||||
"title": "Critical bug: Application crashes",
|
|
||||||
"state": "open",
|
|
||||||
"labels": [{"name": "bug"}, {"name": "critical"}],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Error when importing package",
|
|
||||||
"state": "open",
|
|
||||||
"labels": [{"name": "bug"}],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _analyze_issue_sentiment(issues_data)
|
|
||||||
|
|
||||||
assert result["overall_sentiment_score"] < 50
|
|
||||||
assert result["issues_analyzed"] == 2
|
|
||||||
assert result["sentiment_factors"]["open_issues"] == 2
|
|
||||||
assert result["sentiment_factors"]["bug_reports"] == 2
|
|
||||||
|
|
||||||
def test_analyze_stackoverflow_sentiment_positive(self):
|
|
||||||
"""Test analyzing positive Stack Overflow sentiment."""
|
|
||||||
questions = [
|
|
||||||
{
|
|
||||||
"title": "How to implement best practices with test-package",
|
|
||||||
"tags": ["test-package", "python"],
|
|
||||||
"score": 5,
|
|
||||||
"is_answered": True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Tutorial: Getting started with test-package",
|
|
||||||
"tags": ["test-package", "tutorial"],
|
|
||||||
"score": 3,
|
|
||||||
"is_answered": True,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
result = _analyze_stackoverflow_sentiment(questions, "test-package")
|
|
||||||
|
|
||||||
assert result["overall_sentiment_score"] > 50
|
|
||||||
assert result["questions_analyzed"] == 2
|
|
||||||
assert result["question_characteristics"]["answered_questions"] == 2
|
|
||||||
assert result["question_characteristics"]["average_score"] == 4.0
|
|
||||||
|
|
||||||
def test_analyze_stackoverflow_sentiment_negative(self):
|
|
||||||
"""Test analyzing negative Stack Overflow sentiment."""
|
|
||||||
questions = [
|
|
||||||
{
|
|
||||||
"title": "test-package not working: Error on import",
|
|
||||||
"tags": ["test-package", "error"],
|
|
||||||
"score": -1,
|
|
||||||
"is_answered": False,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Problem with test-package installation",
|
|
||||||
"tags": ["test-package", "installation"],
|
|
||||||
"score": 0,
|
|
||||||
"is_answered": False,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
result = _analyze_stackoverflow_sentiment(questions, "test-package")
|
|
||||||
|
|
||||||
assert result["overall_sentiment_score"] < 50
|
|
||||||
assert result["questions_analyzed"] == 2
|
|
||||||
assert result["question_characteristics"]["unanswered_questions"] == 2
|
|
||||||
assert result["question_characteristics"]["average_score"] == -0.5
|
|
||||||
|
|
||||||
def test_calculate_community_score_excellent(self):
|
|
||||||
"""Test calculating excellent community score."""
|
|
||||||
github_sentiment = {
|
|
||||||
"sentiment_analysis": {"overall_sentiment_score": 85}
|
|
||||||
}
|
|
||||||
stackoverflow_data = {
|
|
||||||
"sentiment_analysis": {"overall_sentiment_score": 80}
|
|
||||||
}
|
|
||||||
quality_indicators = {
|
|
||||||
"quality_indicator_score": 90
|
|
||||||
}
|
|
||||||
community_health = {
|
|
||||||
"github_community_health": {"health_percentage": 95}
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _calculate_community_score(
|
|
||||||
github_sentiment,
|
|
||||||
stackoverflow_data,
|
|
||||||
quality_indicators,
|
|
||||||
community_health
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["overall_score"] >= 80
|
|
||||||
assert result["community_status"] == "excellent"
|
|
||||||
assert len(result["score_components"]) > 0
|
|
||||||
|
|
||||||
def test_calculate_community_score_poor(self):
|
|
||||||
"""Test calculating poor community score."""
|
|
||||||
github_sentiment = {
|
|
||||||
"sentiment_analysis": {"overall_sentiment_score": 20}
|
|
||||||
}
|
|
||||||
stackoverflow_data = {
|
|
||||||
"sentiment_analysis": {"overall_sentiment_score": 25}
|
|
||||||
}
|
|
||||||
quality_indicators = {
|
|
||||||
"quality_indicator_score": 15
|
|
||||||
}
|
|
||||||
community_health = {}
|
|
||||||
|
|
||||||
result = _calculate_community_score(
|
|
||||||
github_sentiment,
|
|
||||||
stackoverflow_data,
|
|
||||||
quality_indicators,
|
|
||||||
community_health
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["overall_score"] < 35
|
|
||||||
assert result["community_status"] == "poor"
|
|
||||||
|
|
||||||
def test_generate_community_insights_strong_community(self):
|
|
||||||
"""Test generating insights for strong community."""
|
|
||||||
github_sentiment = {
|
|
||||||
"repository_stats": {"stargazers_count": 2000}
|
|
||||||
}
|
|
||||||
stackoverflow_data = {
|
|
||||||
"questions_found": 25
|
|
||||||
}
|
|
||||||
community_score = {
|
|
||||||
"overall_score": 85
|
|
||||||
}
|
|
||||||
package_metadata = {
|
|
||||||
"name": "test-package"
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _generate_community_insights(
|
|
||||||
github_sentiment,
|
|
||||||
stackoverflow_data,
|
|
||||||
community_score,
|
|
||||||
package_metadata
|
|
||||||
)
|
|
||||||
|
|
||||||
assert "key_insights" in result
|
|
||||||
assert "community_strengths" in result
|
|
||||||
assert len(result["community_strengths"]) > 0
|
|
||||||
# Should have positive insights for high score
|
|
||||||
insights_text = " ".join(result["key_insights"])
|
|
||||||
assert "strong" in insights_text.lower() or "positive" in insights_text.lower()
|
|
||||||
|
|
||||||
def test_extract_contact_info_from_metadata_with_privacy(self):
|
|
||||||
"""Test extracting contact info with privacy enabled."""
|
|
||||||
package_metadata = {
|
|
||||||
"author_email": "author@example.com",
|
|
||||||
"maintainer_email": "maintainer@example.com",
|
|
||||||
"project_urls": {
|
|
||||||
"Repository": "https://github.com/test/repo",
|
|
||||||
"Documentation": "https://docs.example.com",
|
|
||||||
"Support": "https://support.example.com",
|
|
||||||
},
|
|
||||||
"home_page": "https://example.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
contact_types = ["email", "github", "support"]
|
|
||||||
|
|
||||||
result = _extract_contact_info_from_metadata(
|
|
||||||
package_metadata,
|
|
||||||
contact_types,
|
|
||||||
respect_privacy=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["privacy_compliant"] is True
|
|
||||||
# With privacy enabled, emails should be hidden
|
|
||||||
assert "email_note" in result["available_contacts"]
|
|
||||||
# Project URLs should still be included
|
|
||||||
assert len(result["project_urls"]) > 0
|
|
||||||
|
|
||||||
def test_extract_contact_info_from_metadata_without_privacy(self):
|
|
||||||
"""Test extracting contact info with privacy disabled."""
|
|
||||||
package_metadata = {
|
|
||||||
"author_email": "author@example.com",
|
|
||||||
"maintainer_email": "maintainer@example.com",
|
|
||||||
"project_urls": {
|
|
||||||
"Repository": "https://github.com/test/repo",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
contact_types = ["email", "github"]
|
|
||||||
|
|
||||||
result = _extract_contact_info_from_metadata(
|
|
||||||
package_metadata,
|
|
||||||
contact_types,
|
|
||||||
respect_privacy=False
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["privacy_compliant"] is False
|
|
||||||
# With privacy disabled, emails should be included
|
|
||||||
assert "author_email" in result["available_contacts"]
|
|
||||||
assert "maintainer_email" in result["available_contacts"]
|
|
||||||
|
|
||||||
|
|
||||||
class TestCommunityIntegrations:
|
|
||||||
"""Test community tool integrations with external services."""
|
|
||||||
|
|
||||||
async def test_github_community_sentiment_no_repository(self):
|
|
||||||
"""Test GitHub sentiment analysis when no repository is found."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._find_github_repository") as mock_find:
|
|
||||||
mock_find.return_value = {"status": "no_github_repository"}
|
|
||||||
|
|
||||||
result = await _analyze_github_community_sentiment("test-package")
|
|
||||||
|
|
||||||
assert result["status"] == "no_github_repository"
|
|
||||||
|
|
||||||
async def test_stackoverflow_mentions_api_error(self):
|
|
||||||
"""Test Stack Overflow mentions with API error."""
|
|
||||||
with patch("httpx.AsyncClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get.return_value.status_code = 500
|
|
||||||
|
|
||||||
result = await _check_stackoverflow_mentions("test-package")
|
|
||||||
|
|
||||||
assert result["status"] == "api_unavailable"
|
|
||||||
assert result["questions_found"] == 0
|
|
||||||
|
|
||||||
async def test_quality_indicator_with_download_stats(self):
|
|
||||||
"""Test quality indicator calculation with download stats."""
|
|
||||||
with patch("pypi_query_mcp.tools.community.get_package_download_stats") as mock_stats:
|
|
||||||
mock_stats.return_value = {
|
|
||||||
"downloads": {
|
|
||||||
"last_month": 500000,
|
|
||||||
"last_week": 125000,
|
|
||||||
"last_day": 18000,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result = await _analyze_pypi_downloads_as_quality_indicator("test-package")
|
|
||||||
|
|
||||||
assert result["adoption_level"] == "high"
|
|
||||||
assert result["quality_indicator_score"] > 0
|
|
||||||
assert "download_stats" in result
|
|
||||||
|
|
||||||
async def test_community_health_metrics_no_repository(self):
|
|
||||||
"""Test community health metrics when no repository exists."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._find_github_repository") as mock_find:
|
|
||||||
mock_find.return_value = {"status": "no_github_repository"}
|
|
||||||
|
|
||||||
result = await _get_community_health_metrics("test-package")
|
|
||||||
|
|
||||||
assert result["has_repository"] is False
|
|
||||||
assert "note" in result
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
class TestAsyncBehavior:
|
|
||||||
"""Test async behavior and error handling."""
|
|
||||||
|
|
||||||
async def test_concurrent_operations_success(self):
|
|
||||||
"""Test that concurrent operations work correctly."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_meta, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_github_community_sentiment") as mock_github, \
|
|
||||||
patch("pypi_query_mcp.tools.community._check_stackoverflow_mentions") as mock_so, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_community_health_metrics") as mock_health:
|
|
||||||
|
|
||||||
# Set up mocks to return after small delays to test concurrency
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
async def delayed_return(value, delay=0.01):
|
|
||||||
await asyncio.sleep(delay)
|
|
||||||
return value
|
|
||||||
|
|
||||||
mock_meta.return_value = delayed_return({"name": "test-package"})
|
|
||||||
mock_github.return_value = delayed_return({"sentiment_analysis": {"overall_sentiment_score": 75}})
|
|
||||||
mock_so.return_value = delayed_return({"sentiment_analysis": {"overall_sentiment_score": 70}})
|
|
||||||
mock_quality.return_value = delayed_return({"quality_indicator_score": 80})
|
|
||||||
mock_health.return_value = delayed_return({"has_repository": True})
|
|
||||||
|
|
||||||
start_time = datetime.now()
|
|
||||||
result = await get_pypi_package_reviews("test-package")
|
|
||||||
end_time = datetime.now()
|
|
||||||
|
|
||||||
# Should complete relatively quickly due to concurrent execution
|
|
||||||
assert (end_time - start_time).total_seconds() < 1.0
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
|
|
||||||
async def test_partial_failure_handling(self):
|
|
||||||
"""Test handling when some operations fail but others succeed."""
|
|
||||||
with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_meta, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_github_community_sentiment", side_effect=Exception("GitHub error")) as mock_github, \
|
|
||||||
patch("pypi_query_mcp.tools.community._check_stackoverflow_mentions") as mock_so, \
|
|
||||||
patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
|
|
||||||
patch("pypi_query_mcp.tools.community._get_community_health_metrics", side_effect=Exception("Health error")) as mock_health:
|
|
||||||
|
|
||||||
mock_meta.return_value = {"name": "test-package"}
|
|
||||||
mock_so.return_value = {"sentiment_analysis": {"overall_sentiment_score": 70}}
|
|
||||||
mock_quality.return_value = {"quality_indicator_score": 80}
|
|
||||||
|
|
||||||
result = await get_pypi_package_reviews("test-package")
|
|
||||||
|
|
||||||
# Should still return a result even with some failures
|
|
||||||
assert result["package"] == "test-package"
|
|
||||||
assert "community_score" in result
|
|
||||||
# Failed operations should result in empty dicts or be excluded
|
|
@ -1,730 +0,0 @@
|
|||||||
"""Tests for PyPI Discovery & Monitoring Tools."""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from unittest.mock import AsyncMock, patch, Mock
|
|
||||||
|
|
||||||
from pypi_query_mcp.core.exceptions import InvalidPackageNameError, NetworkError, SearchError
|
|
||||||
from pypi_query_mcp.tools.discovery import (
|
|
||||||
DiscoveryCache,
|
|
||||||
get_pypi_package_recommendations,
|
|
||||||
get_pypi_trending_today,
|
|
||||||
monitor_pypi_new_releases,
|
|
||||||
search_pypi_by_maintainer,
|
|
||||||
_categorize_package,
|
|
||||||
_is_package_maintainer,
|
|
||||||
_discovery_cache,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDiscoveryCache:
|
|
||||||
"""Test the DiscoveryCache functionality."""
|
|
||||||
|
|
||||||
def test_cache_basic_operations(self):
|
|
||||||
"""Test basic cache get/set operations."""
|
|
||||||
cache = DiscoveryCache(default_ttl=60)
|
|
||||||
|
|
||||||
# Test empty cache
|
|
||||||
assert cache.get("nonexistent") is None
|
|
||||||
|
|
||||||
# Test set and get
|
|
||||||
test_data = {"test": "value"}
|
|
||||||
cache.set("test_key", test_data)
|
|
||||||
assert cache.get("test_key") == test_data
|
|
||||||
|
|
||||||
# Test clear
|
|
||||||
cache.clear()
|
|
||||||
assert cache.get("test_key") is None
|
|
||||||
|
|
||||||
def test_cache_expiration(self):
|
|
||||||
"""Test cache expiration functionality."""
|
|
||||||
cache = DiscoveryCache(default_ttl=1) # 1 second TTL
|
|
||||||
|
|
||||||
test_data = {"test": "value"}
|
|
||||||
cache.set("test_key", test_data)
|
|
||||||
|
|
||||||
# Should be available immediately
|
|
||||||
assert cache.get("test_key") == test_data
|
|
||||||
|
|
||||||
# Mock time to simulate expiration
|
|
||||||
with patch("time.time", return_value=1000000):
|
|
||||||
cache.set("test_key", test_data)
|
|
||||||
|
|
||||||
with patch("time.time", return_value=1000002): # 2 seconds later
|
|
||||||
assert cache.get("test_key") is None
|
|
||||||
|
|
||||||
def test_cache_custom_ttl(self):
|
|
||||||
"""Test cache with custom TTL."""
|
|
||||||
cache = DiscoveryCache(default_ttl=60)
|
|
||||||
|
|
||||||
test_data = {"test": "value"}
|
|
||||||
cache.set("test_key", test_data, ttl=120) # Custom 2-minute TTL
|
|
||||||
|
|
||||||
# Should still be available after default TTL would expire
|
|
||||||
with patch("time.time", return_value=1000000):
|
|
||||||
cache.set("test_key", test_data, ttl=120)
|
|
||||||
|
|
||||||
with patch("time.time", return_value=1000060): # 1 minute later
|
|
||||||
assert cache.get("test_key") == test_data
|
|
||||||
|
|
||||||
with patch("time.time", return_value=1000130): # 2+ minutes later
|
|
||||||
assert cache.get("test_key") is None
|
|
||||||
|
|
||||||
|
|
||||||
class TestMonitorPyPINewReleases:
|
|
||||||
"""Test the monitor_pypi_new_releases function."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_monitor_basic_functionality(self):
|
|
||||||
"""Test basic monitoring functionality."""
|
|
||||||
mock_releases = [
|
|
||||||
{
|
|
||||||
"name": "test-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"release_time": "2023-01-01T12:00:00Z",
|
|
||||||
"description": "Test package",
|
|
||||||
"link": "https://pypi.org/project/test-package/",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
mock_package_info = {
|
|
||||||
"info": {
|
|
||||||
"name": "test-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "A test package",
|
|
||||||
"author": "Test Author",
|
|
||||||
"license": "MIT",
|
|
||||||
"home_page": "https://example.com",
|
|
||||||
"keywords": "test, package",
|
|
||||||
"requires_python": ">=3.8",
|
|
||||||
"project_urls": {},
|
|
||||||
"classifiers": ["Topic :: Software Development"],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
|
|
||||||
mock_fetch.return_value = mock_releases
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = mock_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["software-development"]
|
|
||||||
|
|
||||||
result = await monitor_pypi_new_releases(hours=24)
|
|
||||||
|
|
||||||
assert "new_releases" in result
|
|
||||||
assert result["total_releases_found"] == 1
|
|
||||||
assert result["monitoring_period_hours"] == 24
|
|
||||||
assert len(result["new_releases"]) == 1
|
|
||||||
|
|
||||||
release = result["new_releases"][0]
|
|
||||||
assert release["name"] == "test-package"
|
|
||||||
assert release["summary"] == "A test package"
|
|
||||||
assert "categories" in release
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_monitor_with_filters(self):
|
|
||||||
"""Test monitoring with various filters."""
|
|
||||||
mock_releases = [
|
|
||||||
{
|
|
||||||
"name": "web-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"release_time": "2023-01-01T12:00:00Z",
|
|
||||||
"description": "Web framework",
|
|
||||||
"link": "https://pypi.org/project/web-package/",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "data-package",
|
|
||||||
"version": "2.0.0",
|
|
||||||
"release_time": "2023-01-01T13:00:00Z",
|
|
||||||
"description": "Data science package",
|
|
||||||
"link": "https://pypi.org/project/data-package/",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
|
|
||||||
mock_fetch.return_value = mock_releases
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
|
|
||||||
def mock_get_package_info(package_name):
|
|
||||||
if package_name == "web-package":
|
|
||||||
return {
|
|
||||||
"info": {
|
|
||||||
"name": "web-package",
|
|
||||||
"author": "Web Author",
|
|
||||||
"summary": "Web framework",
|
|
||||||
"license": "MIT",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
elif package_name == "data-package":
|
|
||||||
return {
|
|
||||||
"info": {
|
|
||||||
"name": "data-package",
|
|
||||||
"author": "Data Author",
|
|
||||||
"summary": "Data science package",
|
|
||||||
"license": "Apache",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_client.get_package_info.side_effect = mock_get_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
def mock_categorize_func(info):
|
|
||||||
if "web" in info.get("summary", "").lower():
|
|
||||||
return ["web"]
|
|
||||||
elif "data" in info.get("summary", "").lower():
|
|
||||||
return ["data-science"]
|
|
||||||
return ["general"]
|
|
||||||
|
|
||||||
mock_categorize.side_effect = mock_categorize_func
|
|
||||||
|
|
||||||
# Test category filtering
|
|
||||||
result = await monitor_pypi_new_releases(
|
|
||||||
categories=["web"],
|
|
||||||
hours=24
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["total_releases_found"] == 1
|
|
||||||
assert result["new_releases"][0]["name"] == "web-package"
|
|
||||||
|
|
||||||
# Test maintainer filtering
|
|
||||||
result = await monitor_pypi_new_releases(
|
|
||||||
maintainer_filter="Web Author",
|
|
||||||
hours=24
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["total_releases_found"] == 1
|
|
||||||
assert result["new_releases"][0]["name"] == "web-package"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_monitor_cache_functionality(self):
|
|
||||||
"""Test cache functionality in monitoring."""
|
|
||||||
# Clear cache first
|
|
||||||
_discovery_cache.clear()
|
|
||||||
|
|
||||||
mock_releases = [
|
|
||||||
{
|
|
||||||
"name": "cached-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"release_time": "2023-01-01T12:00:00Z",
|
|
||||||
"description": "Cached package",
|
|
||||||
"link": "https://pypi.org/project/cached-package/",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
|
|
||||||
mock_fetch.return_value = mock_releases
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = {
|
|
||||||
"info": {
|
|
||||||
"name": "cached-package",
|
|
||||||
"summary": "Cached package",
|
|
||||||
"author": "Cache Author",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["general"]
|
|
||||||
|
|
||||||
# First call should fetch data
|
|
||||||
result1 = await monitor_pypi_new_releases(hours=24, cache_ttl=300)
|
|
||||||
assert mock_fetch.call_count == 1
|
|
||||||
|
|
||||||
# Second call with same parameters should use cache
|
|
||||||
result2 = await monitor_pypi_new_releases(hours=24, cache_ttl=300)
|
|
||||||
assert mock_fetch.call_count == 1 # Should not increase
|
|
||||||
|
|
||||||
# Results should be identical
|
|
||||||
assert result1["timestamp"] == result2["timestamp"]
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_monitor_error_handling(self):
|
|
||||||
"""Test error handling in monitoring."""
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
|
|
||||||
mock_fetch.side_effect = Exception("Network error")
|
|
||||||
|
|
||||||
with pytest.raises(NetworkError):
|
|
||||||
await monitor_pypi_new_releases(hours=24)
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPyPITrendingToday:
|
|
||||||
"""Test the get_pypi_trending_today function."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_trending_basic_functionality(self):
|
|
||||||
"""Test basic trending analysis."""
|
|
||||||
mock_releases_result = {
|
|
||||||
"new_releases": [
|
|
||||||
{
|
|
||||||
"name": "trending-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "Trending package",
|
|
||||||
"categories": ["web"],
|
|
||||||
"release_time": "2023-01-01T12:00:00Z",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_trending_result = {
|
|
||||||
"trending_packages": [
|
|
||||||
{
|
|
||||||
"package": "popular-package",
|
|
||||||
"downloads": {"last_day": 10000},
|
|
||||||
"summary": "Popular package",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.monitor_pypi_new_releases") as mock_monitor:
|
|
||||||
mock_monitor.return_value = mock_releases_result
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.search.get_trending_packages") as mock_trending:
|
|
||||||
mock_trending.return_value = mock_trending_result
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._enhance_trending_analysis") as mock_enhance:
|
|
||||||
mock_enhance.return_value = [
|
|
||||||
{
|
|
||||||
"name": "trending-package",
|
|
||||||
"trending_score": 10.0,
|
|
||||||
"trending_reason": "new_release",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "popular-package",
|
|
||||||
"trending_score": 8.0,
|
|
||||||
"trending_reason": "download_surge",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
result = await get_pypi_trending_today(
|
|
||||||
category="web",
|
|
||||||
limit=10
|
|
||||||
)
|
|
||||||
|
|
||||||
assert "trending_today" in result
|
|
||||||
assert result["total_trending"] == 2
|
|
||||||
assert result["category"] == "web"
|
|
||||||
assert len(result["trending_today"]) == 2
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_trending_with_filters(self):
|
|
||||||
"""Test trending analysis with filters."""
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.monitor_pypi_new_releases") as mock_monitor:
|
|
||||||
mock_monitor.return_value = {"new_releases": []}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.search.get_trending_packages") as mock_trending:
|
|
||||||
mock_trending.return_value = {"trending_packages": []}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._enhance_trending_analysis") as mock_enhance:
|
|
||||||
mock_enhance.return_value = []
|
|
||||||
|
|
||||||
result = await get_pypi_trending_today(
|
|
||||||
category="ai",
|
|
||||||
min_downloads=5000,
|
|
||||||
limit=20,
|
|
||||||
include_new_packages=False,
|
|
||||||
trending_threshold=2.0
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["category"] == "ai"
|
|
||||||
assert result["filters_applied"]["min_downloads"] == 5000
|
|
||||||
assert result["filters_applied"]["trending_threshold"] == 2.0
|
|
||||||
assert not result["filters_applied"]["include_new_packages"]
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_trending_error_handling(self):
|
|
||||||
"""Test error handling in trending analysis."""
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.monitor_pypi_new_releases") as mock_monitor:
|
|
||||||
mock_monitor.side_effect = Exception("Monitoring error")
|
|
||||||
|
|
||||||
with pytest.raises(SearchError):
|
|
||||||
await get_pypi_trending_today()
|
|
||||||
|
|
||||||
|
|
||||||
class TestSearchPyPIByMaintainer:
|
|
||||||
"""Test the search_pypi_by_maintainer function."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_search_by_maintainer_basic(self):
|
|
||||||
"""Test basic maintainer search functionality."""
|
|
||||||
mock_search_results = {
|
|
||||||
"packages": [
|
|
||||||
{
|
|
||||||
"name": "maintainer-package-1",
|
|
||||||
"summary": "First package",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "maintainer-package-2",
|
|
||||||
"summary": "Second package",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
mock_package_info = {
|
|
||||||
"info": {
|
|
||||||
"name": "maintainer-package-1",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "First package",
|
|
||||||
"author": "Test Maintainer",
|
|
||||||
"author_email": "test@example.com",
|
|
||||||
"license": "MIT",
|
|
||||||
"keywords": "test",
|
|
||||||
"classifiers": [],
|
|
||||||
"requires_python": ">=3.8",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.search.search_packages") as mock_search:
|
|
||||||
mock_search.return_value = mock_search_results
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = mock_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._is_package_maintainer") as mock_is_maintainer:
|
|
||||||
mock_is_maintainer.return_value = True
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["development"]
|
|
||||||
|
|
||||||
result = await search_pypi_by_maintainer(
|
|
||||||
maintainer="Test Maintainer",
|
|
||||||
sort_by="popularity"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["maintainer"] == "Test Maintainer"
|
|
||||||
assert result["total_packages"] == 1
|
|
||||||
assert len(result["packages"]) == 1
|
|
||||||
assert "portfolio_analysis" in result
|
|
||||||
assert "maintainer_profile" in result
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_search_by_maintainer_invalid_input(self):
|
|
||||||
"""Test maintainer search with invalid input."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await search_pypi_by_maintainer("")
|
|
||||||
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await search_pypi_by_maintainer(" ")
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_search_by_maintainer_with_stats(self):
|
|
||||||
"""Test maintainer search with download statistics."""
|
|
||||||
mock_search_results = {"packages": [{"name": "stats-package"}]}
|
|
||||||
mock_package_info = {
|
|
||||||
"info": {
|
|
||||||
"name": "stats-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"author": "Stats Maintainer",
|
|
||||||
"summary": "Package with stats",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mock_stats = {
|
|
||||||
"recent_downloads": {
|
|
||||||
"last_month": 50000,
|
|
||||||
"last_week": 12000,
|
|
||||||
"last_day": 2000,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.search.search_packages") as mock_search:
|
|
||||||
mock_search.return_value = mock_search_results
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = mock_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._is_package_maintainer") as mock_is_maintainer:
|
|
||||||
mock_is_maintainer.return_value = True
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["general"]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.download_stats.get_package_download_stats") as mock_get_stats:
|
|
||||||
mock_get_stats.return_value = mock_stats
|
|
||||||
|
|
||||||
result = await search_pypi_by_maintainer(
|
|
||||||
maintainer="Stats Maintainer",
|
|
||||||
include_stats=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["total_packages"] == 1
|
|
||||||
package = result["packages"][0]
|
|
||||||
assert "download_stats" in package
|
|
||||||
assert package["download_stats"]["last_month"] == 50000
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_search_by_maintainer_error_handling(self):
|
|
||||||
"""Test error handling in maintainer search."""
|
|
||||||
with patch("pypi_query_mcp.tools.search.search_packages") as mock_search:
|
|
||||||
mock_search.side_effect = Exception("Search error")
|
|
||||||
|
|
||||||
with pytest.raises(SearchError):
|
|
||||||
await search_pypi_by_maintainer("Error Maintainer")
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPyPIPackageRecommendations:
|
|
||||||
"""Test the get_pypi_package_recommendations function."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_recommendations_basic_functionality(self):
|
|
||||||
"""Test basic recommendation functionality."""
|
|
||||||
mock_package_info = {
|
|
||||||
"info": {
|
|
||||||
"name": "base-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "Base package for recommendations",
|
|
||||||
"keywords": "test, recommendations",
|
|
||||||
"classifiers": ["Topic :: Software Development"],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = mock_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._find_similar_packages") as mock_similar:
|
|
||||||
mock_similar.return_value = [
|
|
||||||
{
|
|
||||||
"name": "similar-package",
|
|
||||||
"type": "similar",
|
|
||||||
"confidence": 0.8,
|
|
||||||
"reason": "Similar functionality",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._enhance_recommendations") as mock_enhance:
|
|
||||||
mock_enhance.return_value = [
|
|
||||||
{
|
|
||||||
"name": "similar-package",
|
|
||||||
"type": "similar",
|
|
||||||
"confidence": 0.8,
|
|
||||||
"summary": "Similar package",
|
|
||||||
"categories": ["development"],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["development"]
|
|
||||||
|
|
||||||
result = await get_pypi_package_recommendations(
|
|
||||||
package_name="base-package",
|
|
||||||
recommendation_type="similar"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["base_package"]["name"] == "base-package"
|
|
||||||
assert result["total_recommendations"] == 1
|
|
||||||
assert result["recommendation_type"] == "similar"
|
|
||||||
assert len(result["recommendations"]) == 1
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_recommendations_different_types(self):
|
|
||||||
"""Test different recommendation types."""
|
|
||||||
mock_package_info = {
|
|
||||||
"info": {
|
|
||||||
"name": "test-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "Test package",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = mock_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._find_complementary_packages") as mock_complementary:
|
|
||||||
mock_complementary.return_value = [
|
|
||||||
{
|
|
||||||
"name": "complementary-package",
|
|
||||||
"type": "complementary",
|
|
||||||
"confidence": 0.9,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._enhance_recommendations") as mock_enhance:
|
|
||||||
mock_enhance.return_value = [
|
|
||||||
{
|
|
||||||
"name": "complementary-package",
|
|
||||||
"type": "complementary",
|
|
||||||
"confidence": 0.9,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["general"]
|
|
||||||
|
|
||||||
result = await get_pypi_package_recommendations(
|
|
||||||
package_name="test-package",
|
|
||||||
recommendation_type="complementary"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["recommendation_type"] == "complementary"
|
|
||||||
assert result["total_recommendations"] == 1
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_recommendations_with_user_context(self):
|
|
||||||
"""Test recommendations with user context."""
|
|
||||||
mock_package_info = {
|
|
||||||
"info": {
|
|
||||||
"name": "context-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "Package with context",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
user_context = {
|
|
||||||
"experience_level": "beginner",
|
|
||||||
"use_case": "web development",
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.return_value = mock_package_info
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._find_similar_packages") as mock_similar:
|
|
||||||
mock_similar.return_value = []
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._enhance_recommendations") as mock_enhance:
|
|
||||||
mock_enhance.return_value = []
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
|
|
||||||
mock_categorize.return_value = ["web"]
|
|
||||||
|
|
||||||
result = await get_pypi_package_recommendations(
|
|
||||||
package_name="context-package",
|
|
||||||
user_context=user_context
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["parameters"]["user_context"] == user_context
|
|
||||||
assert result["algorithm_insights"]["personalization_applied"] == True
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_recommendations_invalid_input(self):
|
|
||||||
"""Test recommendations with invalid input."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_package_recommendations("")
|
|
||||||
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await get_pypi_package_recommendations(" ")
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_recommendations_error_handling(self):
|
|
||||||
"""Test error handling in recommendations."""
|
|
||||||
with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
|
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
||||||
mock_client.get_package_info.side_effect = Exception("Package error")
|
|
||||||
|
|
||||||
with pytest.raises(SearchError):
|
|
||||||
await get_pypi_package_recommendations("error-package")
|
|
||||||
|
|
||||||
|
|
||||||
class TestHelperFunctions:
|
|
||||||
"""Test helper functions used by discovery tools."""
|
|
||||||
|
|
||||||
def test_categorize_package(self):
|
|
||||||
"""Test package categorization."""
|
|
||||||
# Test with classifiers
|
|
||||||
package_info = {
|
|
||||||
"summary": "Web framework for Python",
|
|
||||||
"description": "A micro web framework",
|
|
||||||
"keywords": "web, framework, api",
|
|
||||||
"classifiers": [
|
|
||||||
"Topic :: Internet :: WWW/HTTP :: Dynamic Content",
|
|
||||||
"Topic :: Software Development :: Libraries :: Python Modules"
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._categorize_package", return_value=["web", "internet"]):
|
|
||||||
categories = _categorize_package(package_info)
|
|
||||||
assert "web" in categories
|
|
||||||
|
|
||||||
def test_is_package_maintainer(self):
|
|
||||||
"""Test maintainer checking functionality."""
|
|
||||||
package_info = {
|
|
||||||
"author": "John Doe",
|
|
||||||
"author_email": "john@example.com",
|
|
||||||
"maintainer": "Jane Smith",
|
|
||||||
"maintainer_email": "jane@example.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Test author match
|
|
||||||
assert _is_package_maintainer(package_info, "John Doe", False) == True
|
|
||||||
assert _is_package_maintainer(package_info, "john doe", False) == True
|
|
||||||
|
|
||||||
# Test maintainer match
|
|
||||||
assert _is_package_maintainer(package_info, "Jane Smith", False) == True
|
|
||||||
|
|
||||||
# Test no match
|
|
||||||
assert _is_package_maintainer(package_info, "Bob Wilson", False) == False
|
|
||||||
|
|
||||||
# Test email match (when enabled)
|
|
||||||
assert _is_package_maintainer(package_info, "john@example.com", True) == True
|
|
||||||
assert _is_package_maintainer(package_info, "john@example.com", False) == False
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_rss_response():
|
|
||||||
"""Mock RSS response for testing."""
|
|
||||||
return '''<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<rss version="2.0">
|
|
||||||
<channel>
|
|
||||||
<title>PyPI Recent Updates</title>
|
|
||||||
<item>
|
|
||||||
<title>test-package 1.0.0</title>
|
|
||||||
<description>Test package description</description>
|
|
||||||
<link>https://pypi.org/project/test-package/</link>
|
|
||||||
<pubDate>Mon, 01 Jan 2023 12:00:00 GMT</pubDate>
|
|
||||||
</item>
|
|
||||||
</channel>
|
|
||||||
</rss>'''
|
|
||||||
|
|
||||||
|
|
||||||
class TestIntegration:
|
|
||||||
"""Integration tests for discovery tools."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_full_workflow_monitoring_to_recommendations(self):
|
|
||||||
"""Test full workflow from monitoring to recommendations."""
|
|
||||||
# This would be a more complex integration test
|
|
||||||
# that combines multiple functions in a realistic workflow
|
|
||||||
pass
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cache_consistency_across_functions(self):
|
|
||||||
"""Test cache consistency across different discovery functions."""
|
|
||||||
# Clear cache first
|
|
||||||
_discovery_cache.clear()
|
|
||||||
|
|
||||||
# Test that cache is properly shared between functions
|
|
||||||
with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
|
|
||||||
mock_fetch.return_value = []
|
|
||||||
|
|
||||||
# First call should populate cache
|
|
||||||
await monitor_pypi_new_releases(hours=24, cache_ttl=300)
|
|
||||||
assert mock_fetch.call_count == 1
|
|
||||||
|
|
||||||
# Second call should use cache
|
|
||||||
await monitor_pypi_new_releases(hours=24, cache_ttl=300)
|
|
||||||
assert mock_fetch.call_count == 1 # Should not increase
|
|
||||||
|
|
||||||
def test_error_propagation(self):
|
|
||||||
"""Test that errors are properly propagated and handled."""
|
|
||||||
# Test various error scenarios and ensure they're handled consistently
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Additional test classes for edge cases and performance testing could be added here
|
|
@ -1,574 +0,0 @@
|
|||||||
"""Tests for PyPI Development Workflow Tools."""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
|
||||||
|
|
||||||
from pypi_query_mcp.core.exceptions import (
|
|
||||||
InvalidPackageNameError,
|
|
||||||
NetworkError,
|
|
||||||
PackageNotFoundError,
|
|
||||||
)
|
|
||||||
from pypi_query_mcp.tools.workflow import (
|
|
||||||
PyPIWorkflowError,
|
|
||||||
_analyze_build_quality,
|
|
||||||
_analyze_wheel_filename,
|
|
||||||
_calculate_completeness_score,
|
|
||||||
_calculate_discoverability_score,
|
|
||||||
_generate_html_preview,
|
|
||||||
_generate_next_steps,
|
|
||||||
_validate_package_name_format,
|
|
||||||
check_pypi_upload_requirements,
|
|
||||||
get_pypi_build_logs,
|
|
||||||
preview_pypi_package_page,
|
|
||||||
validate_pypi_package_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestValidatePackageNameFormat:
|
|
||||||
"""Test package name format validation."""
|
|
||||||
|
|
||||||
def test_valid_package_names(self):
|
|
||||||
"""Test that valid package names pass validation."""
|
|
||||||
valid_names = [
|
|
||||||
"mypackage",
|
|
||||||
"my-package",
|
|
||||||
"my_package",
|
|
||||||
"my.package",
|
|
||||||
"package123",
|
|
||||||
"a",
|
|
||||||
"package-name-123",
|
|
||||||
]
|
|
||||||
|
|
||||||
for name in valid_names:
|
|
||||||
result = _validate_package_name_format(name)
|
|
||||||
assert result["valid"] is True, f"'{name}' should be valid"
|
|
||||||
assert len(result["issues"]) == 0
|
|
||||||
|
|
||||||
def test_invalid_package_names(self):
|
|
||||||
"""Test that invalid package names fail validation."""
|
|
||||||
invalid_names = [
|
|
||||||
"", # Empty
|
|
||||||
"-package", # Starts with hyphen
|
|
||||||
"package-", # Ends with hyphen
|
|
||||||
".package", # Starts with dot
|
|
||||||
"package.", # Ends with dot
|
|
||||||
"pack--age", # Double hyphen
|
|
||||||
"pack..age", # Double dot
|
|
||||||
"pack@age", # Invalid character
|
|
||||||
"PACKAGE", # Uppercase (should get recommendation)
|
|
||||||
]
|
|
||||||
|
|
||||||
for name in invalid_names:
|
|
||||||
result = _validate_package_name_format(name)
|
|
||||||
if name == "PACKAGE":
|
|
||||||
# This should be valid but get recommendations
|
|
||||||
assert result["valid"] is True
|
|
||||||
assert len(result["recommendations"]) > 0
|
|
||||||
else:
|
|
||||||
assert result["valid"] is False or len(result["issues"]) > 0, f"'{name}' should be invalid"
|
|
||||||
|
|
||||||
def test_reserved_names(self):
|
|
||||||
"""Test that reserved names are flagged."""
|
|
||||||
reserved_names = ["pip", "setuptools", "wheel", "python"]
|
|
||||||
|
|
||||||
for name in reserved_names:
|
|
||||||
result = _validate_package_name_format(name)
|
|
||||||
assert result["valid"] is False
|
|
||||||
assert any("reserved" in issue.lower() for issue in result["issues"])
|
|
||||||
|
|
||||||
def test_normalization(self):
|
|
||||||
"""Test package name normalization."""
|
|
||||||
test_cases = [
|
|
||||||
("My_Package", "my-package"),
|
|
||||||
("my__package", "my-package"),
|
|
||||||
("my.-_package", "my-package"),
|
|
||||||
("PACKAGE", "package"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for input_name, expected in test_cases:
|
|
||||||
result = _validate_package_name_format(input_name)
|
|
||||||
assert result["normalized_name"] == expected
|
|
||||||
|
|
||||||
|
|
||||||
class TestValidatePyPIPackageName:
|
|
||||||
"""Test the main package name validation function."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_validate_available_package(self):
|
|
||||||
"""Test validation of an available package name."""
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
# Mock package not found (available)
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = PackageNotFoundError("test-package")
|
|
||||||
|
|
||||||
result = await validate_pypi_package_name("test-package")
|
|
||||||
|
|
||||||
assert result["package_name"] == "test-package"
|
|
||||||
assert result["availability"]["status"] == "available"
|
|
||||||
assert result["ready_for_upload"] is True
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_validate_taken_package(self):
|
|
||||||
"""Test validation of a taken package name."""
|
|
||||||
mock_package_data = {
|
|
||||||
"info": {
|
|
||||||
"name": "requests",
|
|
||||||
"version": "2.28.0",
|
|
||||||
"summary": "Python HTTP for Humans.",
|
|
||||||
"author": "Kenneth Reitz",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
|
|
||||||
|
|
||||||
result = await validate_pypi_package_name("requests")
|
|
||||||
|
|
||||||
assert result["package_name"] == "requests"
|
|
||||||
assert result["availability"]["status"] == "taken"
|
|
||||||
assert result["availability"]["existing_package"]["name"] == "requests"
|
|
||||||
assert result["ready_for_upload"] is False
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_validate_invalid_format(self):
|
|
||||||
"""Test validation of invalid package name format."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await validate_pypi_package_name("-invalid-")
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_network_error_handling(self):
|
|
||||||
"""Test handling of network errors during validation."""
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = NetworkError("Connection failed")
|
|
||||||
|
|
||||||
result = await validate_pypi_package_name("test-package")
|
|
||||||
|
|
||||||
assert result["availability"]["status"] == "unknown"
|
|
||||||
|
|
||||||
|
|
||||||
class TestPreviewPyPIPackagePage:
|
|
||||||
"""Test package page preview generation."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_basic_preview_generation(self):
|
|
||||||
"""Test basic preview generation with minimal metadata."""
|
|
||||||
result = await preview_pypi_package_page(
|
|
||||||
package_name="my-package",
|
|
||||||
version="1.0.0",
|
|
||||||
summary="A test package",
|
|
||||||
author="Test Author"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["package_name"] == "my-package"
|
|
||||||
assert result["version"] == "1.0.0"
|
|
||||||
assert result["preview"]["sections"]["header"]["summary"] == "A test package"
|
|
||||||
assert result["preview"]["sections"]["header"]["author"] == "Test Author"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_comprehensive_preview(self):
|
|
||||||
"""Test preview generation with comprehensive metadata."""
|
|
||||||
keywords = ["testing", "python", "package"]
|
|
||||||
classifiers = [
|
|
||||||
"Development Status :: 4 - Beta",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
]
|
|
||||||
|
|
||||||
result = await preview_pypi_package_page(
|
|
||||||
package_name="comprehensive-package",
|
|
||||||
version="2.1.0",
|
|
||||||
summary="A comprehensive test package with full metadata",
|
|
||||||
description="This is a detailed description of the package functionality...",
|
|
||||||
author="Test Author",
|
|
||||||
license_name="MIT",
|
|
||||||
home_page="https://github.com/test/package",
|
|
||||||
keywords=keywords,
|
|
||||||
classifiers=classifiers,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["ready_for_upload"] is True
|
|
||||||
assert result["validation"]["completeness_score"]["level"] in ["good", "complete"]
|
|
||||||
assert result["seo_analysis"]["discoverability_score"]["level"] in ["good", "excellent"]
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_preview_warnings(self):
|
|
||||||
"""Test that preview generates appropriate warnings."""
|
|
||||||
result = await preview_pypi_package_page(
|
|
||||||
package_name="minimal-package",
|
|
||||||
# Minimal metadata to trigger warnings
|
|
||||||
)
|
|
||||||
|
|
||||||
assert len(result["warnings"]) > 0
|
|
||||||
assert any("Summary is missing" in warning for warning in result["warnings"])
|
|
||||||
assert any("description" in warning.lower() for warning in result["warnings"])
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_invalid_package_name_preview(self):
|
|
||||||
"""Test preview with invalid package name."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await preview_pypi_package_page("-invalid-package-")
|
|
||||||
|
|
||||||
|
|
||||||
class TestCalculateScores:
|
|
||||||
"""Test scoring calculation functions."""
|
|
||||||
|
|
||||||
def test_discoverability_score_calculation(self):
|
|
||||||
"""Test discoverability score calculation."""
|
|
||||||
# High quality metadata
|
|
||||||
result = _calculate_discoverability_score(
|
|
||||||
summary="A comprehensive package for testing",
|
|
||||||
description="This is a very detailed description with lots of useful information about the package functionality and use cases.",
|
|
||||||
keywords=["testing", "python", "package", "quality", "automation"],
|
|
||||||
classifiers=["Development Status :: 4 - Beta", "Programming Language :: Python :: 3.8"]
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["score"] >= 70
|
|
||||||
assert result["level"] in ["good", "excellent"]
|
|
||||||
|
|
||||||
# Poor quality metadata
|
|
||||||
result = _calculate_discoverability_score("", "", [], [])
|
|
||||||
assert result["score"] == 0
|
|
||||||
assert result["level"] == "poor"
|
|
||||||
|
|
||||||
def test_completeness_score_calculation(self):
|
|
||||||
"""Test completeness score calculation."""
|
|
||||||
# Complete metadata
|
|
||||||
sections = {
|
|
||||||
"header": {
|
|
||||||
"summary": "A test package",
|
|
||||||
"author": "Test Author",
|
|
||||||
},
|
|
||||||
"metadata": {
|
|
||||||
"license": "MIT",
|
|
||||||
"home_page": "https://github.com/test/package",
|
|
||||||
"keywords": ["test", "package"],
|
|
||||||
"classifiers": ["Development Status :: 4 - Beta"],
|
|
||||||
},
|
|
||||||
"description": {
|
|
||||||
"content": "A detailed description with more than 200 characters to ensure it gets a good score.",
|
|
||||||
"length": 80,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _calculate_completeness_score(sections)
|
|
||||||
assert result["score"] >= 60
|
|
||||||
assert result["level"] in ["good", "complete"]
|
|
||||||
|
|
||||||
|
|
||||||
class TestCheckPyPIUploadRequirements:
|
|
||||||
"""Test PyPI upload requirements checking."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_minimal_requirements_met(self):
|
|
||||||
"""Test with minimal required fields."""
|
|
||||||
result = await check_pypi_upload_requirements(
|
|
||||||
package_name="test-package",
|
|
||||||
version="1.0.0",
|
|
||||||
author="Test Author",
|
|
||||||
description="A test package"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["upload_readiness"]["can_upload"] is True
|
|
||||||
assert result["validation"]["compliance"]["required_percentage"] == 100.0
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_missing_required_fields(self):
|
|
||||||
"""Test with missing required fields."""
|
|
||||||
result = await check_pypi_upload_requirements(
|
|
||||||
package_name="test-package",
|
|
||||||
# Missing required fields
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["upload_readiness"]["can_upload"] is False
|
|
||||||
assert len(result["issues"]["errors"]) > 0
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_comprehensive_metadata(self):
|
|
||||||
"""Test with comprehensive metadata."""
|
|
||||||
classifiers = [
|
|
||||||
"Development Status :: 4 - Beta",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
]
|
|
||||||
|
|
||||||
result = await check_pypi_upload_requirements(
|
|
||||||
package_name="comprehensive-package",
|
|
||||||
version="1.0.0",
|
|
||||||
author="Test Author",
|
|
||||||
author_email="test@example.com",
|
|
||||||
description="A comprehensive test package",
|
|
||||||
long_description="This is a detailed description...",
|
|
||||||
license_name="MIT",
|
|
||||||
home_page="https://github.com/test/package",
|
|
||||||
classifiers=classifiers,
|
|
||||||
requires_python=">=3.8"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert result["upload_readiness"]["should_upload"] is True
|
|
||||||
assert result["validation"]["compliance"]["recommended_percentage"] >= 80.0
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_invalid_package_name_requirements(self):
|
|
||||||
"""Test requirements check with invalid package name."""
|
|
||||||
with pytest.raises(InvalidPackageNameError):
|
|
||||||
await check_pypi_upload_requirements("-invalid-")
|
|
||||||
|
|
||||||
|
|
||||||
class TestGetPyPIBuildLogs:
|
|
||||||
"""Test PyPI build logs analysis."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_analyze_package_with_wheels(self):
|
|
||||||
"""Test analysis of package with wheel distributions."""
|
|
||||||
mock_package_data = {
|
|
||||||
"info": {"name": "test-package", "version": "1.0.0"},
|
|
||||||
"releases": {
|
|
||||||
"1.0.0": [
|
|
||||||
{
|
|
||||||
"filename": "test_package-1.0.0-py3-none-any.whl",
|
|
||||||
"packagetype": "bdist_wheel",
|
|
||||||
"size": 10000,
|
|
||||||
"upload_time_iso_8601": "2023-01-01T00:00:00Z",
|
|
||||||
"python_version": "py3",
|
|
||||||
"url": "https://files.pythonhosted.org/...",
|
|
||||||
"md5_digest": "abc123",
|
|
||||||
"digests": {"sha256": "def456"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "test-package-1.0.0.tar.gz",
|
|
||||||
"packagetype": "sdist",
|
|
||||||
"size": 15000,
|
|
||||||
"upload_time_iso_8601": "2023-01-01T00:00:00Z",
|
|
||||||
"python_version": "source",
|
|
||||||
"url": "https://files.pythonhosted.org/...",
|
|
||||||
"md5_digest": "ghi789",
|
|
||||||
"digests": {"sha256": "jkl012"},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"urls": [] # Empty for this test
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
|
|
||||||
|
|
||||||
result = await get_pypi_build_logs("test-package")
|
|
||||||
|
|
||||||
assert result["package_name"] == "test-package"
|
|
||||||
assert result["build_summary"]["wheel_count"] == 1
|
|
||||||
assert result["build_summary"]["source_count"] == 1
|
|
||||||
assert result["build_status"]["has_wheels"] is True
|
|
||||||
assert result["build_status"]["has_source"] is True
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_analyze_source_only_package(self):
|
|
||||||
"""Test analysis of package with only source distribution."""
|
|
||||||
mock_package_data = {
|
|
||||||
"info": {"name": "source-only", "version": "1.0.0"},
|
|
||||||
"releases": {
|
|
||||||
"1.0.0": [
|
|
||||||
{
|
|
||||||
"filename": "source-only-1.0.0.tar.gz",
|
|
||||||
"packagetype": "sdist",
|
|
||||||
"size": 20000,
|
|
||||||
"upload_time_iso_8601": "2023-01-01T00:00:00Z",
|
|
||||||
"python_version": "source",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"urls": []
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
|
|
||||||
|
|
||||||
result = await get_pypi_build_logs("source-only")
|
|
||||||
|
|
||||||
assert result["build_status"]["has_wheels"] is False
|
|
||||||
assert result["build_status"]["has_source"] is True
|
|
||||||
assert any("No wheel distributions" in warning for warning in result["issues"]["warnings"])
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_package_not_found_build_logs(self):
|
|
||||||
"""Test build logs for non-existent package."""
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = PackageNotFoundError("nonexistent")
|
|
||||||
|
|
||||||
with pytest.raises(PackageNotFoundError):
|
|
||||||
await get_pypi_build_logs("nonexistent")
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_platform_filtering(self):
|
|
||||||
"""Test platform-specific filtering of build logs."""
|
|
||||||
mock_package_data = {
|
|
||||||
"info": {"name": "multi-platform", "version": "1.0.0"},
|
|
||||||
"releases": {
|
|
||||||
"1.0.0": [
|
|
||||||
{
|
|
||||||
"filename": "multi_platform-1.0.0-py3-none-win_amd64.whl",
|
|
||||||
"packagetype": "bdist_wheel",
|
|
||||||
"size": 10000,
|
|
||||||
"python_version": "py3",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "multi_platform-1.0.0-py3-none-linux_x86_64.whl",
|
|
||||||
"packagetype": "bdist_wheel",
|
|
||||||
"size": 10000,
|
|
||||||
"python_version": "py3",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"urls": []
|
|
||||||
}
|
|
||||||
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
|
|
||||||
|
|
||||||
# Test Windows filtering
|
|
||||||
result = await get_pypi_build_logs("multi-platform", platform="windows")
|
|
||||||
|
|
||||||
# Should only include Windows wheels
|
|
||||||
windows_wheels = [w for w in result["distributions"]["wheels"] if "win" in w.get("platform", "")]
|
|
||||||
assert len(windows_wheels) > 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestWheelFilenameAnalysis:
|
|
||||||
"""Test wheel filename analysis."""
|
|
||||||
|
|
||||||
def test_universal_wheel_analysis(self):
|
|
||||||
"""Test analysis of universal wheel filename."""
|
|
||||||
result = _analyze_wheel_filename("mypackage-1.0.0-py2.py3-none-any.whl")
|
|
||||||
|
|
||||||
assert result["wheel_type"] == "universal"
|
|
||||||
assert result["platform"] == "any"
|
|
||||||
assert result["python_implementation"] == "universal"
|
|
||||||
|
|
||||||
def test_platform_specific_wheel_analysis(self):
|
|
||||||
"""Test analysis of platform-specific wheel filename."""
|
|
||||||
result = _analyze_wheel_filename("mypackage-1.0.0-cp38-cp38-win_amd64.whl")
|
|
||||||
|
|
||||||
assert result["wheel_type"] == "platform_specific"
|
|
||||||
assert result["platform"] == "windows"
|
|
||||||
assert result["python_implementation"] == "cpython"
|
|
||||||
assert result["architecture"] == "x86_64"
|
|
||||||
|
|
||||||
def test_linux_wheel_analysis(self):
|
|
||||||
"""Test analysis of Linux wheel filename."""
|
|
||||||
result = _analyze_wheel_filename("mypackage-1.0.0-cp39-cp39-linux_x86_64.whl")
|
|
||||||
|
|
||||||
assert result["platform"] == "linux"
|
|
||||||
assert result["architecture"] == "x86_64"
|
|
||||||
|
|
||||||
def test_macos_wheel_analysis(self):
|
|
||||||
"""Test analysis of macOS wheel filename."""
|
|
||||||
result = _analyze_wheel_filename("mypackage-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl")
|
|
||||||
|
|
||||||
assert result["platform"] == "macos"
|
|
||||||
assert result["architecture"] == "x86_64"
|
|
||||||
|
|
||||||
|
|
||||||
class TestBuildQualityAnalysis:
|
|
||||||
"""Test build quality analysis."""
|
|
||||||
|
|
||||||
def test_high_quality_build_analysis(self):
|
|
||||||
"""Test analysis of high-quality builds."""
|
|
||||||
distributions = {
|
|
||||||
"wheels": [
|
|
||||||
{"platform": "windows", "size_bytes": 1000000, "python_version": "py3"},
|
|
||||||
{"platform": "linux", "size_bytes": 1000000, "python_version": "py3"},
|
|
||||||
{"platform": "macos", "size_bytes": 1000000, "python_version": "py3"},
|
|
||||||
],
|
|
||||||
"source": [{"size_bytes": 500000}],
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _analyze_build_quality(distributions, {})
|
|
||||||
|
|
||||||
assert result["health_status"] in ["good", "excellent"]
|
|
||||||
assert result["platform_coverage"] == 3
|
|
||||||
assert len(result["health_issues"]) == 0
|
|
||||||
|
|
||||||
def test_poor_quality_build_analysis(self):
|
|
||||||
"""Test analysis of poor-quality builds."""
|
|
||||||
distributions = {
|
|
||||||
"wheels": [], # No wheels
|
|
||||||
"source": [], # No source
|
|
||||||
}
|
|
||||||
|
|
||||||
result = _analyze_build_quality(distributions, {})
|
|
||||||
|
|
||||||
assert result["health_status"] == "poor"
|
|
||||||
assert len(result["health_issues"]) > 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestUtilityFunctions:
|
|
||||||
"""Test utility functions."""
|
|
||||||
|
|
||||||
def test_generate_html_preview(self):
|
|
||||||
"""Test HTML preview generation."""
|
|
||||||
sections = {
|
|
||||||
"header": {
|
|
||||||
"name": "test-package",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"summary": "A test package",
|
|
||||||
"author": "Test Author",
|
|
||||||
},
|
|
||||||
"metadata": {
|
|
||||||
"license": "MIT",
|
|
||||||
"home_page": "https://github.com/test/package",
|
|
||||||
"keywords": ["test"],
|
|
||||||
"classifiers": ["Development Status :: 4 - Beta"],
|
|
||||||
},
|
|
||||||
"description": {
|
|
||||||
"content": "Test description",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
html = _generate_html_preview(sections)
|
|
||||||
|
|
||||||
assert "test-package" in html
|
|
||||||
assert "1.0.0" in html
|
|
||||||
assert "A test package" in html
|
|
||||||
assert "Test Author" in html
|
|
||||||
assert "MIT" in html
|
|
||||||
|
|
||||||
def test_generate_next_steps(self):
|
|
||||||
"""Test next steps generation."""
|
|
||||||
errors = ["Missing required field: name"]
|
|
||||||
warnings = ["Author email is recommended"]
|
|
||||||
suggestions = ["Consider adding keywords"]
|
|
||||||
|
|
||||||
steps = _generate_next_steps(errors, warnings, suggestions, False)
|
|
||||||
|
|
||||||
assert len(steps) > 0
|
|
||||||
assert any("Fix critical errors" in step for step in steps)
|
|
||||||
|
|
||||||
# Test with upload ready
|
|
||||||
steps_ready = _generate_next_steps([], warnings, suggestions, True)
|
|
||||||
assert any("Ready for upload" in step for step in steps_ready)
|
|
||||||
|
|
||||||
|
|
||||||
class TestErrorHandling:
|
|
||||||
"""Test error handling in workflow functions."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_workflow_error_handling(self):
|
|
||||||
"""Test custom workflow error handling."""
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.side_effect = Exception("Unexpected error")
|
|
||||||
|
|
||||||
with pytest.raises(PyPIWorkflowError) as exc_info:
|
|
||||||
await validate_pypi_package_name("test-package")
|
|
||||||
|
|
||||||
assert "validate_name" in str(exc_info.value.operation)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_network_error_propagation(self):
|
|
||||||
"""Test that network errors are properly propagated."""
|
|
||||||
with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
|
|
||||||
mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = NetworkError("Network down")
|
|
||||||
|
|
||||||
with pytest.raises(PyPIWorkflowError):
|
|
||||||
await get_pypi_build_logs("test-package")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
pytest.main([__file__])
|
|
Loading…
x
Reference in New Issue
Block a user