21 changed files with 546 additions and 11391 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -691,21 +691,6 @@ rich = ">=13.9.4"
 [package.extras]
 websockets = ["websockets (>=15.0.1)"]
 [[package]]
 name = "feedparser"
 version = "6.0.11"
 description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
 files = [
    {file = "feedparser-6.0.11-py3-none-any.whl", hash = "sha256:0be7ee7b395572b19ebeb1d6aafb0028dee11169f1c934e0ed67d54992f4ad45"},
    {file = "feedparser-6.0.11.tar.gz", hash = "sha256:c9d0407b64c6f2a065d0ebb292c2b35c01050cc0dc33757461aaabdc4c4184d5"},
 ]
 [package.dependencies]
 sgmllib3k = "*"
 [[package]]
 name = "filelock"
 version = "3.19.1"
@ -2009,17 +1994,6 @@ files = [
    {file = "ruff-0.12.9.tar.gz", hash = "sha256:fbd94b2e3c623f659962934e52c2bea6fc6da11f667a427a368adaf3af2c866a"},
 ]
 [[package]]
 name = "sgmllib3k"
 version = "1.0.0"
 description = "Py3k port of sgmllib."
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
    {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
 ]
 [[package]]
 name = "six"
 version = "1.17.0"
@ -2276,4 +2250,4 @@ watchdog = ["watchdog (>=2.3)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "13bc4176d567d6738ca9ca5ebd67565f8526853434911137f4b51b39e275a546"
+content-hash = "9785e18d2d996f5e58e1b06c722f6de31c445a1a83528f39227d1c373b91f989"
--- a/pypi_query_mcp/init.py
+++ b/pypi_query_mcp/init.py
@ -8,10 +8,6 @@ __version__ = "0.1.0"
 __author__ = "Hal"
 __email__ = "hal.long@outlook.com"
-try:
+from pypi_query_mcp.server import mcp
-    from pypi_query_mcp.server import mcp
+
-    __all__ = ["mcp", "__version__"]
+__all__ = ["mcp", "__version__"]
 except ImportError:
    # Server dependencies not available (fastmcp, etc.)
    # Tools can still be imported individually
    __all__ = ["__version__"]
--- a/pypi_query_mcp/core/search_client.py
+++ b/pypi_query_mcp/core/search_client.py
@ -126,42 +126,20 @@ class PyPISearchClient:
        try:
            # Use PyPI's search API as the primary source
-            try:
+            pypi_results = await self._search_pypi_api(query, limit * 3)  # Get more for filtering
                pypi_results = await self._search_pypi_api(query, limit * 3)  # Get more for filtering
                logger.info(f"Got {len(pypi_results)} raw results from PyPI API")
            except Exception as e:
                logger.error(f"PyPI API search failed: {e}")
                pypi_results = []
            # Enhance results with additional metadata
-            try:
+            enhanced_results = await self._enhance_search_results(pypi_results)
                enhanced_results = await self._enhance_search_results(pypi_results)
                logger.info(f"Enhanced to {len(enhanced_results)} results")
            except Exception as e:
                logger.error(f"Enhancement failed: {e}")
                enhanced_results = pypi_results
            # Apply filters
-            try:
+            filtered_results = self._apply_filters(enhanced_results, filters)
                filtered_results = self._apply_filters(enhanced_results, filters)
                logger.info(f"Filtered to {len(filtered_results)} results")
            except Exception as e:
                logger.error(f"Filtering failed: {e}")
                filtered_results = enhanced_results
            # Apply semantic search if requested
            if semantic_search:
-                try:
+                filtered_results = self._apply_semantic_search(filtered_results, query)
                    filtered_results = self._apply_semantic_search(filtered_results, query)
                except Exception as e:
                    logger.error(f"Semantic search failed: {e}")
            # Sort results
-            try:
+            sorted_results = self._sort_results(filtered_results, sort)
                sorted_results = self._sort_results(filtered_results, sort)
            except Exception as e:
                logger.error(f"Sorting failed: {e}")
                sorted_results = filtered_results
            # Limit results
            final_results = sorted_results[:limit]
@ -183,318 +161,72 @@ class PyPISearchClient:
            raise SearchError(f"Search failed: {e}") from e
    async def _search_pypi_api(self, query: str, limit: int) -> List[Dict[str, Any]]:
-        """Search using available PyPI methods - no native search API exists."""
+        """Search using PyPI's official search API."""
-        logger.info(f"PyPI has no native search API, using curated search for: '{query}'")
+        url = "https://pypi.org/search/"
        params = {
            "q": query,
            "page": 1,
        }
-        # PyPI doesn't have a search API, so we'll use our curated approach
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
        # combined with direct package lookups for exact matches
        results = []
        # First: try direct package lookup (exact match)
        try:
            direct_result = await self._try_direct_package_lookup(query)
            if direct_result:
                results.extend(direct_result)
        except Exception as e:
            logger.debug(f"Direct lookup failed: {e}")
        # Second: search curated packages
        try:
            curated_results = await self._search_curated_packages(query, limit)
            # Add curated results that aren't already in the list
            existing_names = {r["name"].lower() for r in results}
            for result in curated_results:
                if result["name"].lower() not in existing_names:
                    results.append(result)
        except Exception as e:
            logger.error(f"Curated search failed: {e}")
        return results[:limit]
    async def _try_direct_package_lookup(self, query: str) -> List[Dict[str, Any]]:
        """Try to get package info directly using PyPI JSON API."""
        candidates = [
            query.strip(),
            query.strip().lower(),
            query.strip().replace(" ", "-"),
            query.strip().replace(" ", "_"),
            query.strip().replace("_", "-"),
            query.strip().replace("-", "_"),
        ]
        results = []
        for candidate in candidates:
            try:
-                async with PyPIClient() as client:
+                response = await client.get(url, params=params)
-                    package_data = await client.get_package_info(candidate)
+                response.raise_for_status()
                    results.append({
                        "name": package_data["info"]["name"],
                        "summary": package_data["info"]["summary"] or "",
                        "version": package_data["info"]["version"],
                        "source": "direct_api",
                        "description": package_data["info"]["description"] or "",
                        "author": package_data["info"]["author"] or "",
                        "license": package_data["info"]["license"] or "",
                        "home_page": package_data["info"]["home_page"] or "",
                        "requires_python": package_data["info"]["requires_python"] or "",
                        "classifiers": package_data["info"]["classifiers"] or [],
                        "keywords": package_data["info"]["keywords"] or "",
                    })
                    break  # Found exact match, stop looking
            except Exception:
                continue  # Try next candidate
-        return results
+                # Parse the HTML response (PyPI search returns HTML)
-
+                return await self._parse_search_html(response.text, limit)
-    async def _search_curated_packages(self, query: str, limit: int) -> List[Dict[str, Any]]:
+                
-        """Search our curated package database."""
+            except httpx.HTTPError as e:
-        from ..data.popular_packages import ALL_POPULAR_PACKAGES
+                logger.error(f"PyPI search API error: {e}")
-        
+                # Fallback to alternative search method
-        curated_matches = []
+                return await self._fallback_search(query, limit)
        query_lower = query.lower()
        logger.info(f"Searching {len(ALL_POPULAR_PACKAGES)} curated packages for '{query}'")
        # First: exact name matches
        for pkg in ALL_POPULAR_PACKAGES:
            if query_lower == pkg.name.lower():
                curated_matches.append({
                    "name": pkg.name,
                    "summary": pkg.description,
                    "version": "latest",
                    "source": "curated_exact",
                    "category": pkg.category,
                    "estimated_downloads": pkg.estimated_monthly_downloads,
                    "github_stars": pkg.github_stars,
                    "primary_use_case": pkg.primary_use_case,
                })
        # Second: name contains query (if not too many exact matches)
        if len(curated_matches) < limit:
            for pkg in ALL_POPULAR_PACKAGES:
                if (query_lower in pkg.name.lower() and 
                    pkg.name not in [m["name"] for m in curated_matches]):
                    curated_matches.append({
                        "name": pkg.name,
                        "summary": pkg.description,
                        "version": "latest",
                        "source": "curated_name",
                        "category": pkg.category,
                        "estimated_downloads": pkg.estimated_monthly_downloads,
                        "github_stars": pkg.github_stars,
                        "primary_use_case": pkg.primary_use_case,
                    })
        # Third: description or use case matches (if still need more results)
        if len(curated_matches) < limit:
            for pkg in ALL_POPULAR_PACKAGES:
                if ((query_lower in pkg.description.lower() or 
                     query_lower in pkg.primary_use_case.lower()) and
                    pkg.name not in [m["name"] for m in curated_matches]):
                    curated_matches.append({
                        "name": pkg.name,
                        "summary": pkg.description,
                        "version": "latest",
                        "source": "curated_desc",
                        "category": pkg.category,
                        "estimated_downloads": pkg.estimated_monthly_downloads,
                        "github_stars": pkg.github_stars,
                        "primary_use_case": pkg.primary_use_case,
                    })
        # Sort by popularity (downloads)
        curated_matches.sort(key=lambda x: x.get("estimated_downloads", 0), reverse=True)
        logger.info(f"Found {len(curated_matches)} curated matches")
        return curated_matches[:limit]
    async def _fallback_search(self, query: str, limit: int) -> List[Dict[str, Any]]:
        """Fallback search using PyPI JSON API and our curated data."""
        from ..data.popular_packages import PACKAGES_BY_NAME, get_popular_packages
        # Search in our curated packages first
        curated_matches = []
        query_lower = query.lower()
        for package_info in get_popular_packages(limit=1000):
            name_match = query_lower in package_info.name.lower()
            desc_match = query_lower in package_info.description.lower()
            if name_match or desc_match:
                curated_matches.append({
                    "name": package_info.name,
                    "summary": package_info.description,
                    "version": "unknown",
                    "source": "curated",
                    "category": package_info.category,
                    "estimated_downloads": package_info.estimated_monthly_downloads,
                })
        # If we have some matches, return them
        if curated_matches:
            return curated_matches[:limit]
        # Last resort: try simple package name search
        try:
-            from ..data.popular_packages import PACKAGES_BY_NAME, get_popular_packages, ALL_POPULAR_PACKAGES
+            async with PyPIClient() as client:
-            
+                # Try to get the package directly if it's an exact match
-            # Search in our curated packages first
+                try:
            curated_matches = []
            query_lower = query.lower()
            logger.info(f"Searching in {len(ALL_POPULAR_PACKAGES)} curated packages for '{query}'")
            # First: exact name matches
            for package_info in ALL_POPULAR_PACKAGES:
                if query_lower == package_info.name.lower():
                    curated_matches.append({
                        "name": package_info.name,
                        "summary": package_info.description,
                        "version": "latest",
                        "source": "curated_exact",
                        "category": package_info.category,
                        "estimated_downloads": package_info.estimated_monthly_downloads,
                        "github_stars": package_info.github_stars,
                    })
            # Second: name contains query
            for package_info in ALL_POPULAR_PACKAGES:
                if (query_lower in package_info.name.lower() and 
                    package_info.name not in [m["name"] for m in curated_matches]):
                    curated_matches.append({
                        "name": package_info.name,
                        "summary": package_info.description,
                        "version": "latest",
                        "source": "curated_name",
                        "category": package_info.category,
                        "estimated_downloads": package_info.estimated_monthly_downloads,
                        "github_stars": package_info.github_stars,
                    })
            # Third: description or use case matches
            for package_info in ALL_POPULAR_PACKAGES:
                if ((query_lower in package_info.description.lower() or 
                     query_lower in package_info.primary_use_case.lower()) and
                    package_info.name not in [m["name"] for m in curated_matches]):
                    curated_matches.append({
                        "name": package_info.name,
                        "summary": package_info.description,
                        "version": "latest",
                        "source": "curated_desc",
                        "category": package_info.category,
                        "estimated_downloads": package_info.estimated_monthly_downloads,
                        "github_stars": package_info.github_stars,
                    })
            logger.info(f"Found {len(curated_matches)} curated matches")
            # If we have some matches, return them (sorted by popularity)
            if curated_matches:
                curated_matches.sort(key=lambda x: x.get("estimated_downloads", 0), reverse=True)
                return curated_matches[:limit]
            # Last resort: try direct package lookup
            logger.info("No curated matches, trying direct package lookup")
            try:
                async with PyPIClient() as client:
                    package_data = await client.get_package_info(query)
                    return [{
                        "name": package_data["info"]["name"],
                        "summary": package_data["info"]["summary"] or "",
                        "version": package_data["info"]["version"],
-                        "source": "direct_fallback",
+                        "source": "direct",
                        "description": package_data["info"]["description"] or "",
                        "author": package_data["info"]["author"] or "",
                    }]
-            except Exception as e:
+                except:
-                logger.info(f"Direct lookup failed: {e}")
+                    pass
        except Exception as e:
-            logger.error(f"Fallback search failed: {e}")
+            logger.warning(f"Fallback search failed: {e}")
        return []
    async def _search_xmlrpc(self, query: str, limit: int) -> List[Dict[str, Any]]:
        """Search using enhanced curated search with fuzzy matching."""
        # Since PyPI XML-RPC search is deprecated, use our enhanced curated search
        try:
            from ..data.popular_packages import get_popular_packages, ALL_POPULAR_PACKAGES
            query_lower = query.lower()
            results = []
            # First pass: exact name matches
            for pkg in ALL_POPULAR_PACKAGES:
                if query_lower == pkg.name.lower():
                    results.append({
                        "name": pkg.name,
                        "summary": pkg.description,
                        "version": "latest",
                        "source": "curated_exact",
                        "category": pkg.category,
                        "estimated_downloads": pkg.estimated_monthly_downloads,
                        "github_stars": pkg.github_stars,
                    })
            # Second pass: name contains query
            for pkg in ALL_POPULAR_PACKAGES:
                if query_lower in pkg.name.lower() and pkg.name not in [r["name"] for r in results]:
                    results.append({
                        "name": pkg.name,
                        "summary": pkg.description,
                        "version": "latest",
                        "source": "curated_name",
                        "category": pkg.category,
                        "estimated_downloads": pkg.estimated_monthly_downloads,
                        "github_stars": pkg.github_stars,
                    })
            # Third pass: description contains query
            for pkg in ALL_POPULAR_PACKAGES:
                if (query_lower in pkg.description.lower() or 
                    query_lower in pkg.primary_use_case.lower()) and pkg.name not in [r["name"] for r in results]:
                    results.append({
                        "name": pkg.name,
                        "summary": pkg.description,
                        "version": "latest", 
                        "source": "curated_desc",
                        "category": pkg.category,
                        "estimated_downloads": pkg.estimated_monthly_downloads,
                        "github_stars": pkg.github_stars,
                    })
            # Sort by popularity (downloads)
            results.sort(key=lambda x: x.get("estimated_downloads", 0), reverse=True)
            return results[:limit]
        except Exception as e:
            logger.debug(f"Enhanced curated search error: {e}")
        return []
    async def _search_simple_api(self, query: str, limit: int) -> List[Dict[str, Any]]:
        """Search using direct PyPI JSON API for specific packages."""
        try:
            # Try direct package lookup if query looks like a package name
            query_clean = query.strip().lower().replace(" ", "-")
            # Try variations of the query as package names
            candidates = [
                query_clean,
                query_clean.replace("-", "_"),
                query_clean.replace("_", "-"),
                query.strip(),  # Original query
            ]
            results = []
            for candidate in candidates:
                if len(results) >= limit:
                    break
                try:
                    async with PyPIClient() as client:
                        package_data = await client.get_package_info(candidate)
                        results.append({
                            "name": package_data["info"]["name"],
                            "summary": package_data["info"]["summary"] or "",
                            "version": package_data["info"]["version"],
                            "source": "direct_api",
                            "description": package_data["info"]["description"] or "",
                            "author": package_data["info"]["author"] or "",
                            "license": package_data["info"]["license"] or "",
                        })
                except Exception:
                    # Package doesn't exist, continue to next candidate
                    continue
            return results
        except Exception as e:
            logger.debug(f"Simple API search error: {e}")
        return []
    async def _parse_search_html(self, html: str, limit: int) -> List[Dict[str, Any]]:
        """Parse PyPI search results from HTML (simplified parser)."""
        # This is a simplified parser - in production, you'd use BeautifulSoup
@ -505,19 +237,9 @@ class PyPISearchClient:
        """Enhance search results with additional metadata from PyPI API."""
        enhanced = []
-        # Skip enhancement if results already have good metadata from curated source
+        # Process in batches to avoid overwhelming the API
-        if results and results[0].get("source", "").startswith("curated"):
+        batch_size = 5
-            logger.info("Using curated results without enhancement")
+        for i in range(0, len(results), batch_size):
            return results
        # For direct API results, they're already enhanced
        if results and results[0].get("source") == "direct_api":
            logger.info("Using direct API results without additional enhancement")
            return results
        # Process in small batches to avoid overwhelming the API
        batch_size = 3
        for i in range(0, min(len(results), 10), batch_size):  # Limit to first 10 results
            batch = results[i:i + batch_size]
            batch_tasks = [
                self._enhance_single_result(result) 
--- a/pypi_query_mcp/server.py
+++ b/pypi_query_mcp/server.py
--- a/pypi_query_mcp/tools/init.py
+++ b/pypi_query_mcp/tools/init.py
@ -21,6 +21,12 @@ from .package_query import (
    query_package_info,
    query_package_versions,
 )
 from .metadata import (
    manage_package_keywords,
    manage_package_urls,
    set_package_visibility,
    update_package_metadata,
 )
 from .publishing import (
    check_pypi_credentials,
    delete_pypi_release,
@ -29,11 +35,11 @@ from .publishing import (
    manage_pypi_maintainers,
    upload_package_to_pypi,
 )
-from .metadata import (
+from .search import (
-    manage_package_keywords,
+    find_alternatives,
-    manage_package_urls,
+    get_trending_packages,
-    set_package_visibility,
+    search_by_category,
-    update_package_metadata,
+    search_packages,
 )
 from .analytics import (
    analyze_pypi_competition,
@ -41,48 +47,8 @@ from .analytics import (
    get_pypi_package_rankings,
    get_pypi_security_alerts,
 )
 from .discovery import (
    get_pypi_package_recommendations,
    get_pypi_trending_today,
    monitor_pypi_new_releases,
    search_pypi_by_maintainer,
 )
 from .workflow import (
    check_pypi_upload_requirements,
    get_pypi_build_logs,
    preview_pypi_package_page,
    validate_pypi_package_name,
 )
 from .community import (
    get_pypi_package_reviews,
    manage_pypi_package_discussions,
    get_pypi_maintainer_contacts,
 )
 from .search import (
    find_alternatives,
    get_trending_packages,
    search_by_category,
    search_packages,
 )
 from .security_tools import (
    bulk_scan_package_security,
    scan_pypi_package_security,
 )
 from .license_tools import (
    analyze_pypi_package_license,
    check_bulk_license_compliance,
 )
 from .health_tools import (
    assess_package_health_score,
    compare_packages_health_scores,
 )
 from .requirements_tools import (
    analyze_requirements_file_tool,
    compare_multiple_requirements_files,
 )
 __all__ = [
    # Core package tools
    "query_package_info",
    "query_package_versions",
    "query_package_dependencies",
@ -94,52 +60,22 @@ __all__ = [
    "get_package_download_stats",
    "get_package_download_trends",
    "get_top_packages_by_downloads",
    # Search tools
    "search_packages",
    "search_by_category",
    "find_alternatives",
    "get_trending_packages",
    # Publishing tools
    "upload_package_to_pypi",
    "check_pypi_credentials",
    "get_pypi_upload_history",
    "delete_pypi_release",
    "manage_pypi_maintainers",
    "get_pypi_account_info",
    # Metadata tools
    "update_package_metadata",
    "manage_package_urls",
    "set_package_visibility",
    "manage_package_keywords",
    # Analytics tools
    "get_pypi_package_analytics",
    "get_pypi_security_alerts",
    "get_pypi_package_rankings",
    "analyze_pypi_competition",
    # Discovery tools
    "monitor_pypi_new_releases",
    "get_pypi_trending_today",
    "search_pypi_by_maintainer",
    "get_pypi_package_recommendations",
    # Workflow tools
    "validate_pypi_package_name",
    "preview_pypi_package_page",
    "check_pypi_upload_requirements",
    "get_pypi_build_logs",
    # Community tools
    "get_pypi_package_reviews",
    "manage_pypi_package_discussions",
    "get_pypi_maintainer_contacts",
    # Security tools
    "scan_pypi_package_security",
    "bulk_scan_package_security",
    # License tools
    "analyze_pypi_package_license",
    "check_bulk_license_compliance",
    # Health tools
    "assess_package_health_score",
    "compare_packages_health_scores",
    # Requirements tools
    "analyze_requirements_file_tool",
    "compare_multiple_requirements_files",
 ]
--- a/pypi_query_mcp/tools/community.py
+++ b/pypi_query_mcp/tools/community.py
--- a/pypi_query_mcp/tools/compatibility_check.py
+++ b/pypi_query_mcp/tools/compatibility_check.py
@ -39,7 +39,7 @@ async def check_python_compatibility(
    try:
        async with PyPIClient() as client:
-            package_data = await client.get_package_info(package_name, use_cache=use_cache)
+            package_data = await client.get_package_info(package_name, use_cache)
            info = package_data.get("info", {})
            requires_python = info.get("requires_python")
@ -103,7 +103,7 @@ async def get_compatible_python_versions(
    try:
        async with PyPIClient() as client:
-            package_data = await client.get_package_info(package_name, use_cache=use_cache)
+            package_data = await client.get_package_info(package_name, use_cache)
            info = package_data.get("info", {})
            requires_python = info.get("requires_python")
@ -177,7 +177,7 @@ async def suggest_python_version_for_packages(
    async with PyPIClient() as client:
        for package_name in package_names:
            try:
-                package_data = await client.get_package_info(package_name, use_cache=use_cache)
+                package_data = await client.get_package_info(package_name, use_cache)
                info = package_data.get("info", {})
                requires_python = info.get("requires_python")
--- a/pypi_query_mcp/tools/discovery.py
+++ b/pypi_query_mcp/tools/discovery.py
--- a/pypi_query_mcp/tools/health_scorer.py
+++ b/pypi_query_mcp/tools/health_scorer.py
@ -1,974 +0,0 @@
 """Package health scoring and quality assessment tools for PyPI packages."""
 import asyncio
 import logging
 import re
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional
 from urllib.parse import urlparse
 import httpx
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..core.pypi_client import PyPIClient
 logger = logging.getLogger(__name__)
 class PackageHealthScorer:
    """Comprehensive health and quality scorer for PyPI packages."""
    def __init__(self):
        self.timeout = 30.0
        # Health scoring weights (total = 100)
        self.weights = {
            "maintenance": 25,     # Maintenance indicators
            "popularity": 20,      # Download stats, stars, usage
            "documentation": 15,   # Documentation quality
            "testing": 15,         # Testing and CI indicators
            "security": 10,        # Security practices
            "compatibility": 10,   # Python version support
            "metadata": 5,         # Metadata completeness
        }
        # Quality metrics thresholds
        self.thresholds = {
            "downloads_monthly_excellent": 1000000,
            "downloads_monthly_good": 100000,
            "downloads_monthly_fair": 10000,
            "version_age_days_fresh": 90,
            "version_age_days_good": 365,
            "version_age_days_stale": 730,
            "python_versions_excellent": 4,
            "python_versions_good": 3,
            "python_versions_fair": 2,
        }
    async def assess_package_health(
        self, 
        package_name: str,
        version: Optional[str] = None,
        include_github_metrics: bool = True
    ) -> Dict[str, Any]:
        """
        Assess comprehensive health and quality of a PyPI package.
        Args:
            package_name: Name of the package to assess
            version: Specific version to assess (optional)
            include_github_metrics: Whether to fetch GitHub repository metrics
        Returns:
            Dictionary containing health assessment results
        """
        logger.info(f"Starting health assessment for package: {package_name}")
        try:
            async with PyPIClient() as client:
                package_data = await client.get_package_info(package_name, version)
            package_version = version or package_data["info"]["version"]
            # Run parallel health assessments
            assessment_tasks = [
                self._assess_maintenance_health(package_data),
                self._assess_popularity_metrics(package_data),
                self._assess_documentation_quality(package_data),
                self._assess_testing_indicators(package_data),
                self._assess_security_practices(package_data),
                self._assess_compatibility_support(package_data),
                self._assess_metadata_completeness(package_data),
            ]
            if include_github_metrics:
                github_url = self._extract_github_url(package_data)
                if github_url:
                    assessment_tasks.append(self._fetch_github_metrics(github_url))
                else:
                    assessment_tasks.append(asyncio.create_task(self._empty_github_metrics()))
            else:
                assessment_tasks.append(asyncio.create_task(self._empty_github_metrics()))
            results = await asyncio.gather(*assessment_tasks, return_exceptions=True)
            # Unpack results
            (maintenance, popularity, documentation, testing, 
             security, compatibility, metadata, github_metrics) = results
            # Handle exceptions
            if isinstance(github_metrics, Exception):
                github_metrics = self._empty_github_metrics()
            # Calculate overall health score
            health_scores = {
                "maintenance": maintenance.get("score", 0) if not isinstance(maintenance, Exception) else 0,
                "popularity": popularity.get("score", 0) if not isinstance(popularity, Exception) else 0,
                "documentation": documentation.get("score", 0) if not isinstance(documentation, Exception) else 0,
                "testing": testing.get("score", 0) if not isinstance(testing, Exception) else 0,
                "security": security.get("score", 0) if not isinstance(security, Exception) else 0,
                "compatibility": compatibility.get("score", 0) if not isinstance(compatibility, Exception) else 0,
                "metadata": metadata.get("score", 0) if not isinstance(metadata, Exception) else 0,
            }
            overall_score = sum(
                health_scores[category] * (self.weights[category] / 100)
                for category in health_scores
            )
            health_level = self._calculate_health_level(overall_score)
            # Generate recommendations
            recommendations = self._generate_health_recommendations(
                health_scores, maintenance, popularity, documentation, 
                testing, security, compatibility, metadata, github_metrics
            )
            return {
                "package": package_name,
                "version": package_version,
                "assessment_timestamp": datetime.now(timezone.utc).isoformat(),
                "overall_health": {
                    "score": round(overall_score, 2),
                    "level": health_level,
                    "max_score": 100,
                },
                "category_scores": health_scores,
                "detailed_assessment": {
                    "maintenance": maintenance if not isinstance(maintenance, Exception) else {"score": 0, "indicators": [], "issues": [str(maintenance)]},
                    "popularity": popularity if not isinstance(popularity, Exception) else {"score": 0, "metrics": {}, "issues": [str(popularity)]},
                    "documentation": documentation if not isinstance(documentation, Exception) else {"score": 0, "indicators": [], "issues": [str(documentation)]},
                    "testing": testing if not isinstance(testing, Exception) else {"score": 0, "indicators": [], "issues": [str(testing)]},
                    "security": security if not isinstance(security, Exception) else {"score": 0, "practices": [], "issues": [str(security)]},
                    "compatibility": compatibility if not isinstance(compatibility, Exception) else {"score": 0, "support": [], "issues": [str(compatibility)]},
                    "metadata": metadata if not isinstance(metadata, Exception) else {"score": 0, "completeness": {}, "issues": [str(metadata)]},
                    "github_metrics": github_metrics,
                },
                "recommendations": recommendations,
                "health_summary": {
                    "strengths": self._identify_strengths(health_scores),
                    "weaknesses": self._identify_weaknesses(health_scores),
                    "improvement_priority": self._prioritize_improvements(health_scores),
                }
            }
        except Exception as e:
            logger.error(f"Health assessment failed for {package_name}: {e}")
            raise SearchError(f"Health assessment failed: {e}") from e
    async def _assess_maintenance_health(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess package maintenance health indicators."""
        info = package_data.get("info", {})
        releases = package_data.get("releases", {})
        score = 0
        indicators = []
        issues = []
        # Check release frequency
        if releases:
            release_dates = []
            for version_releases in releases.values():
                for release in version_releases:
                    upload_time = release.get("upload_time_iso_8601")
                    if upload_time:
                        try:
                            release_dates.append(datetime.fromisoformat(upload_time.replace('Z', '+00:00')))
                        except:
                            pass
            if release_dates:
                release_dates.sort(reverse=True)
                latest_release = release_dates[0]
                days_since_release = (datetime.now(timezone.utc) - latest_release).days
                if days_since_release <= self.thresholds["version_age_days_fresh"]:
                    score += 25
                    indicators.append(f"Recent release ({days_since_release} days ago)")
                elif days_since_release <= self.thresholds["version_age_days_good"]:
                    score += 20
                    indicators.append(f"Moderately recent release ({days_since_release} days ago)")
                elif days_since_release <= self.thresholds["version_age_days_stale"]:
                    score += 10
                    indicators.append(f"Older release ({days_since_release} days ago)")
                else:
                    issues.append(f"Very old release ({days_since_release} days ago)")
                # Check release consistency (last 5 releases)
                if len(release_dates) >= 5:
                    recent_releases = release_dates[:5]
                    intervals = []
                    for i in range(len(recent_releases) - 1):
                        interval = (recent_releases[i] - recent_releases[i + 1]).days
                        intervals.append(interval)
                    avg_interval = sum(intervals) / len(intervals)
                    if avg_interval <= 180:  # Releases every 6 months or less
                        score += 15
                        indicators.append(f"Regular releases (avg {avg_interval:.0f} days)")
                    elif avg_interval <= 365:
                        score += 10
                        indicators.append(f"Periodic releases (avg {avg_interval:.0f} days)")
                    else:
                        issues.append(f"Infrequent releases (avg {avg_interval:.0f} days)")
        else:
            issues.append("No release history available")
        # Check for development indicators
        if "dev" in info.get("version", "").lower() or "alpha" in info.get("version", "").lower():
            issues.append("Development/alpha version")
        elif "beta" in info.get("version", "").lower():
            score += 5
            indicators.append("Beta version (active development)")
        else:
            score += 10
            indicators.append("Stable version")
        # Check for author/maintainer info
        if info.get("author") or info.get("maintainer"):
            score += 10
            indicators.append("Active maintainer information")
        else:
            issues.append("No maintainer information")
        return {
            "score": min(score, 100),
            "indicators": indicators,
            "issues": issues,
            "metrics": {
                "days_since_last_release": days_since_release if 'days_since_release' in locals() else None,
                "total_releases": len(releases),
            }
        }
    async def _assess_popularity_metrics(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess package popularity and usage metrics."""
        info = package_data.get("info", {})
        score = 0
        metrics = {}
        # Estimate download popularity (since we don't have direct access)
        # Use proxy indicators: project URLs, description length, classifiers
        # Check for GitHub stars indicator
        project_urls = info.get("project_urls", {}) or {}
        github_url = None
        for key, url in project_urls.items():
            if "github.com" in (url or "").lower():
                github_url = url
                break
        if not github_url:
            home_page = info.get("home_page", "")
            if "github.com" in home_page:
                github_url = home_page
        if github_url:
            score += 15
            metrics["has_github_repo"] = True
        else:
            metrics["has_github_repo"] = False
        # Check description quality as popularity indicator
        description = info.get("description", "") or ""
        summary = info.get("summary", "") or ""
        if len(description) > 1000:
            score += 20
            metrics["description_quality"] = "excellent"
        elif len(description) > 500:
            score += 15
            metrics["description_quality"] = "good"
        elif len(description) > 100:
            score += 10
            metrics["description_quality"] = "fair"
        else:
            metrics["description_quality"] = "poor"
        # Check for comprehensive metadata (popularity indicator)
        if info.get("keywords"):
            score += 10
        if len(info.get("classifiers", [])) > 5:
            score += 15
        if info.get("project_urls") and len(info.get("project_urls", {})) > 2:
            score += 10
        # Check for documentation links
        docs_indicators = ["documentation", "docs", "readthedocs", "github.io"]
        has_docs = any(
            any(indicator in (url or "").lower() for indicator in docs_indicators)
            for url in project_urls.values()
        )
        if has_docs:
            score += 15
            metrics["has_documentation"] = True
        else:
            metrics["has_documentation"] = False
        # Check for community indicators
        community_urls = ["issues", "bug", "tracker", "discussion", "forum"]
        has_community = any(
            any(indicator in key.lower() for indicator in community_urls)
            for key in project_urls.keys()
        )
        if has_community:
            score += 15
            metrics["has_community_links"] = True
        else:
            metrics["has_community_links"] = False
        return {
            "score": min(score, 100),
            "metrics": metrics,
        }
    async def _assess_documentation_quality(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess documentation quality indicators."""
        info = package_data.get("info", {})
        score = 0
        indicators = []
        issues = []
        # Check description completeness
        description = info.get("description", "") or ""
        summary = info.get("summary", "") or ""
        if len(description) > 2000:
            score += 30
            indicators.append("Comprehensive description")
        elif len(description) > 1000:
            score += 25
            indicators.append("Good description length")
        elif len(description) > 500:
            score += 15
            indicators.append("Adequate description")
        elif len(description) > 100:
            score += 10
            indicators.append("Basic description")
        else:
            issues.append("Very short or missing description")
        # Check for README indicators in description
        readme_indicators = ["## ", "### ", "```", "# Installation", "# Usage", "# Examples"]
        if any(indicator in description for indicator in readme_indicators):
            score += 20
            indicators.append("Structured documentation (README-style)")
        # Check for documentation URLs
        project_urls = info.get("project_urls", {}) or {}
        docs_urls = []
        for key, url in project_urls.items():
            if any(term in key.lower() for term in ["doc", "guide", "manual", "wiki"]):
                docs_urls.append(url)
        if docs_urls:
            score += 25
            indicators.append(f"Documentation links ({len(docs_urls)} found)")
        else:
            issues.append("No dedicated documentation links")
        # Check for example code in description
        if "```" in description or "    " in description:  # Code blocks
            score += 15
            indicators.append("Contains code examples")
        # Check for installation instructions
        install_keywords = ["install", "pip install", "setup.py", "requirements"]
        if any(keyword in description.lower() for keyword in install_keywords):
            score += 10
            indicators.append("Installation instructions provided")
        else:
            issues.append("No clear installation instructions")
        return {
            "score": min(score, 100),
            "indicators": indicators,
            "issues": issues,
        }
    async def _assess_testing_indicators(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess testing and CI/CD indicators."""
        info = package_data.get("info", {})
        score = 0
        indicators = []
        issues = []
        # Check for testing-related classifiers
        classifiers = info.get("classifiers", [])
        testing_classifiers = [c for c in classifiers if "testing" in c.lower()]
        if testing_classifiers:
            score += 15
            indicators.append("Testing framework classifiers")
        # Check for CI/CD indicators in URLs
        project_urls = info.get("project_urls", {}) or {}
        ci_indicators = ["travis", "circleci", "appveyor", "azure", "github", "actions", "ci", "build"]
        ci_urls = []
        for key, url in project_urls.items():
            if any(indicator in key.lower() or indicator in (url or "").lower() for indicator in ci_indicators):
                ci_urls.append(key)
        if ci_urls:
            score += 25
            indicators.append(f"CI/CD indicators ({len(ci_urls)} found)")
        # Check description for testing mentions
        description = (info.get("description", "") or "").lower()
        testing_keywords = ["test", "pytest", "unittest", "nose", "coverage", "tox", "ci/cd", "continuous integration"]
        testing_mentions = [kw for kw in testing_keywords if kw in description]
        if testing_mentions:
            score += 20
            indicators.append(f"Testing framework mentions ({len(testing_mentions)} found)")
        else:
            issues.append("No testing framework mentions")
        # Check for test dependencies (common patterns)
        requires_dist = info.get("requires_dist", []) or []
        test_deps = []
        for req in requires_dist:
            req_lower = req.lower()
            if any(test_pkg in req_lower for test_pkg in ["pytest", "unittest", "nose", "coverage", "tox", "test"]):
                test_deps.append(req.split()[0])
        if test_deps:
            score += 20
            indicators.append(f"Test dependencies ({len(test_deps)} found)")
        else:
            issues.append("No test dependencies found")
        # Check for badges (often indicate CI/testing)
        badge_indicators = ["[![", "https://img.shields.io", "badge", "build status", "coverage"]
        if any(indicator in description for indicator in badge_indicators):
            score += 20
            indicators.append("Status badges (likely CI integration)")
        return {
            "score": min(score, 100),
            "indicators": indicators,
            "issues": issues,
        }
    async def _assess_security_practices(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess security practices and indicators."""
        info = package_data.get("info", {})
        score = 0
        practices = []
        issues = []
        # Check for security-related URLs
        project_urls = info.get("project_urls", {}) or {}
        security_urls = []
        for key, url in project_urls.items():
            if any(term in key.lower() for term in ["security", "vulnerability", "report", "bug"]):
                security_urls.append(key)
        if security_urls:
            score += 25
            practices.append(f"Security reporting channels ({len(security_urls)} found)")
        else:
            issues.append("No security reporting channels")
        # Check for HTTPS URLs
        https_urls = [url for url in project_urls.values() if (url or "").startswith("https://")]
        if len(https_urls) == len([url for url in project_urls.values() if url]):
            score += 15
            practices.append("All URLs use HTTPS")
        elif https_urls:
            score += 10
            practices.append("Some URLs use HTTPS")
        else:
            issues.append("No HTTPS URLs found")
        # Check for security mentions in description
        description = (info.get("description", "") or "").lower()
        security_keywords = ["security", "secure", "vulnerability", "encryption", "authentication", "authorization"]
        security_mentions = [kw for kw in security_keywords if kw in description]
        if security_mentions:
            score += 20
            practices.append(f"Security awareness ({len(security_mentions)} mentions)")
        # Check for license (security practice)
        if info.get("license") or any("license" in c.lower() for c in info.get("classifiers", [])):
            score += 15
            practices.append("Clear license information")
        else:
            issues.append("No clear license information")
        # Check for author/maintainer email (security contact)
        if info.get("author_email") or info.get("maintainer_email"):
            score += 10
            practices.append("Maintainer contact information")
        else:
            issues.append("No maintainer contact information")
        # Check for requirements specification (dependency security)
        requires_dist = info.get("requires_dist", [])
        if requires_dist:
            # Check for version pinning (security practice)
            pinned_deps = [req for req in requires_dist if any(op in req for op in ["==", ">=", "~="])]
            if pinned_deps:
                score += 15
                practices.append(f"Version-pinned dependencies ({len(pinned_deps)}/{len(requires_dist)})")
            else:
                issues.append("No version-pinned dependencies")
        return {
            "score": min(score, 100),
            "practices": practices,
            "issues": issues,
        }
    async def _assess_compatibility_support(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess Python version and platform compatibility."""
        info = package_data.get("info", {})
        score = 0
        support = []
        issues = []
        # Check Python version support from classifiers
        classifiers = info.get("classifiers", [])
        python_versions = []
        for classifier in classifiers:
            if "Programming Language :: Python ::" in classifier:
                version_part = classifier.split("::")[-1].strip()
                if re.match(r'^\d+\.\d+$', version_part):  # Like "3.8", "3.9"
                    python_versions.append(version_part)
        if len(python_versions) >= self.thresholds["python_versions_excellent"]:
            score += 30
            support.append(f"Excellent Python version support ({len(python_versions)} versions)")
        elif len(python_versions) >= self.thresholds["python_versions_good"]:
            score += 25
            support.append(f"Good Python version support ({len(python_versions)} versions)")
        elif len(python_versions) >= self.thresholds["python_versions_fair"]:
            score += 15
            support.append(f"Fair Python version support ({len(python_versions)} versions)")
        elif python_versions:
            score += 10
            support.append(f"Limited Python version support ({len(python_versions)} versions)")
        else:
            issues.append("No explicit Python version support")
        # Check requires_python specification
        requires_python = info.get("requires_python")
        if requires_python:
            score += 20
            support.append(f"Python requirement specified: {requires_python}")
        else:
            issues.append("No Python version requirement specified")
        # Check platform support
        platform_classifiers = [c for c in classifiers if "Operating System" in c]
        if platform_classifiers:
            if any("OS Independent" in c for c in platform_classifiers):
                score += 20
                support.append("Cross-platform support (OS Independent)")
            else:
                score += 15
                support.append(f"Platform support ({len(platform_classifiers)} platforms)")
        else:
            issues.append("No platform support information")
        # Check for wheel distribution (compatibility indicator)
        urls = info.get("urls", []) or []
        has_wheel = any(url.get("packagetype") == "bdist_wheel" for url in urls)
        if has_wheel:
            score += 15
            support.append("Wheel distribution available")
        else:
            issues.append("No wheel distribution")
        # Check development status
        status_classifiers = [c for c in classifiers if "Development Status" in c]
        if status_classifiers:
            status = status_classifiers[0]
            if "5 - Production/Stable" in status:
                score += 15
                support.append("Production/Stable status")
            elif "4 - Beta" in status:
                score += 10
                support.append("Beta status")
            elif "3 - Alpha" in status:
                score += 5
                support.append("Alpha status")
            else:
                issues.append(f"Early development status: {status}")
        return {
            "score": min(score, 100),
            "support": support,
            "issues": issues,
            "python_versions": python_versions,
        }
    async def _assess_metadata_completeness(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Assess metadata completeness and quality."""
        info = package_data.get("info", {})
        score = 0
        completeness = {}
        # Essential fields
        essential_fields = ["name", "version", "summary", "description", "author", "license"]
        present_essential = [field for field in essential_fields if info.get(field)]
        score += (len(present_essential) / len(essential_fields)) * 40
        completeness["essential_fields"] = f"{len(present_essential)}/{len(essential_fields)}"
        # Additional metadata fields
        additional_fields = ["keywords", "home_page", "author_email", "classifiers", "project_urls"]
        present_additional = [field for field in additional_fields if info.get(field)]
        score += (len(present_additional) / len(additional_fields)) * 30
        completeness["additional_fields"] = f"{len(present_additional)}/{len(additional_fields)}"
        # Classifier completeness
        classifiers = info.get("classifiers", [])
        classifier_categories = set()
        for classifier in classifiers:
            category = classifier.split("::")[0].strip()
            classifier_categories.add(category)
        expected_categories = ["Development Status", "Intended Audience", "License", "Programming Language", "Topic"]
        present_categories = [cat for cat in expected_categories if cat in classifier_categories]
        score += (len(present_categories) / len(expected_categories)) * 20
        completeness["classifier_categories"] = f"{len(present_categories)}/{len(expected_categories)}"
        # URLs completeness
        project_urls = info.get("project_urls", {}) or {}
        expected_url_types = ["homepage", "repository", "documentation", "bug tracker"]
        present_url_types = []
        for expected in expected_url_types:
            if any(expected.lower() in key.lower() for key in project_urls.keys()):
                present_url_types.append(expected)
        score += (len(present_url_types) / len(expected_url_types)) * 10
        completeness["url_types"] = f"{len(present_url_types)}/{len(expected_url_types)}"
        return {
            "score": min(score, 100),
            "completeness": completeness,
        }
    def _extract_github_url(self, package_data: Dict[str, Any]) -> Optional[str]:
        """Extract GitHub repository URL from package data."""
        info = package_data.get("info", {})
        # Check project URLs
        project_urls = info.get("project_urls", {}) or {}
        for url in project_urls.values():
            if url and "github.com" in url:
                return url
        # Check home page
        home_page = info.get("home_page", "")
        if home_page and "github.com" in home_page:
            return home_page
        return None
    async def _fetch_github_metrics(self, github_url: str) -> Dict[str, Any]:
        """Fetch GitHub repository metrics."""
        try:
            # Parse GitHub URL to get owner/repo
            parsed = urlparse(github_url)
            path_parts = parsed.path.strip('/').split('/')
            if len(path_parts) >= 2:
                owner, repo = path_parts[0], path_parts[1]
                # GitHub API call (public API, no auth required for basic info)
                api_url = f"https://api.github.com/repos/{owner}/{repo}"
                async with httpx.AsyncClient(timeout=self.timeout) as client:
                    response = await client.get(
                        api_url,
                        headers={
                            "Accept": "application/vnd.github.v3+json",
                            "User-Agent": "PyPI-Health-Scorer/1.0"
                        }
                    )
                    if response.status_code == 200:
                        data = response.json()
                        return {
                            "stars": data.get("stargazers_count", 0),
                            "forks": data.get("forks_count", 0),
                            "watchers": data.get("watchers_count", 0),
                            "issues": data.get("open_issues_count", 0),
                            "has_wiki": data.get("has_wiki", False),
                            "has_pages": data.get("has_pages", False),
                            "language": data.get("language", ""),
                            "created_at": data.get("created_at", ""),
                            "updated_at": data.get("pushed_at", ""),
                            "default_branch": data.get("default_branch", ""),
                            "archived": data.get("archived", False),
                            "disabled": data.get("disabled", False),
                        }
                    else:
                        logger.warning(f"GitHub API returned status {response.status_code}")
        except Exception as e:
            logger.debug(f"Failed to fetch GitHub metrics: {e}")
        return self._empty_github_metrics()
    async def _empty_github_metrics(self) -> Dict[str, Any]:
        """Return empty GitHub metrics."""
        return {
            "stars": 0,
            "forks": 0,
            "watchers": 0,
            "issues": 0,
            "has_wiki": False,
            "has_pages": False,
            "language": "",
            "created_at": "",
            "updated_at": "",
            "default_branch": "",
            "archived": False,
            "disabled": False,
            "available": False,
        }
    def _calculate_health_level(self, score: float) -> str:
        """Calculate health level from score."""
        if score >= 85:
            return "excellent"
        elif score >= 70:
            return "good"
        elif score >= 55:
            return "fair"
        elif score >= 40:
            return "poor"
        else:
            return "critical"
    def _identify_strengths(self, health_scores: Dict[str, float]) -> List[str]:
        """Identify package strengths."""
        strengths = []
        for category, score in health_scores.items():
            if score >= 80:
                strengths.append(f"Excellent {category} ({score:.0f}/100)")
            elif score >= 65:
                strengths.append(f"Good {category} ({score:.0f}/100)")
        return strengths
    def _identify_weaknesses(self, health_scores: Dict[str, float]) -> List[str]:
        """Identify package weaknesses."""
        weaknesses = []
        for category, score in health_scores.items():
            if score < 40:
                weaknesses.append(f"Poor {category} ({score:.0f}/100)")
            elif score < 55:
                weaknesses.append(f"Fair {category} ({score:.0f}/100)")
        return weaknesses
    def _prioritize_improvements(self, health_scores: Dict[str, float]) -> List[str]:
        """Prioritize improvement areas by weight and score."""
        weighted_gaps = []
        for category, score in health_scores.items():
            gap = 100 - score
            weighted_gap = gap * (self.weights[category] / 100)
            weighted_gaps.append((category, weighted_gap, score))
        # Sort by weighted gap (highest impact first)
        weighted_gaps.sort(key=lambda x: x[1], reverse=True)
        priorities = []
        for category, weighted_gap, score in weighted_gaps[:3]:  # Top 3
            if weighted_gap > 5:  # Only include significant gaps
                priorities.append(f"Improve {category} (current: {score:.0f}/100, impact: {self.weights[category]}%)")
        return priorities
    def _generate_health_recommendations(
        self, health_scores: Dict[str, float], *assessment_results
    ) -> List[str]:
        """Generate actionable health improvement recommendations."""
        recommendations = []
        overall_score = sum(
            health_scores[category] * (self.weights[category] / 100)
            for category in health_scores
        )
        # Overall recommendations
        if overall_score >= 85:
            recommendations.append("🌟 Excellent package health - maintain current standards")
        elif overall_score >= 70:
            recommendations.append("✅ Good package health - minor improvements possible")
        elif overall_score >= 55:
            recommendations.append("⚠️  Fair package health - several areas need improvement")
        elif overall_score >= 40:
            recommendations.append("🔶 Poor package health - significant improvements needed")
        else:
            recommendations.append("🚨 Critical package health - major overhaul required")
        # Specific recommendations based on low scores
        if health_scores.get("maintenance", 0) < 60:
            recommendations.append("📅 Improve maintenance: Update package more regularly, provide clear version history")
        if health_scores.get("documentation", 0) < 60:
            recommendations.append("📚 Improve documentation: Add comprehensive README, usage examples, and API docs")
        if health_scores.get("testing", 0) < 60:
            recommendations.append("🧪 Add testing: Implement test suite, CI/CD pipeline, and code coverage")
        if health_scores.get("security", 0) < 60:
            recommendations.append("🔒 Enhance security: Add security reporting, use HTTPS, specify dependencies properly")
        if health_scores.get("compatibility", 0) < 60:
            recommendations.append("🔧 Improve compatibility: Support more Python versions, add wheel distribution")
        if health_scores.get("metadata", 0) < 60:
            recommendations.append("📝 Complete metadata: Add missing package information, keywords, and classifiers")
        if health_scores.get("popularity", 0) < 60:
            recommendations.append("📈 Build community: Create documentation site, engage with users, add project URLs")
        return recommendations
 # Main health assessment functions
 async def assess_pypi_package_health(
    package_name: str,
    version: Optional[str] = None,
    include_github_metrics: bool = True
 ) -> Dict[str, Any]:
    """
    Assess comprehensive health and quality of a PyPI package.
    Args:
        package_name: Name of the package to assess
        version: Specific version to assess (optional)
        include_github_metrics: Whether to fetch GitHub repository metrics
    Returns:
        Comprehensive health assessment including scores and recommendations
    """
    scorer = PackageHealthScorer()
    return await scorer.assess_package_health(
        package_name, version, include_github_metrics
    )
 async def compare_package_health(
    package_names: List[str],
    include_github_metrics: bool = False
 ) -> Dict[str, Any]:
    """
    Compare health scores across multiple packages.
    Args:
        package_names: List of package names to compare
        include_github_metrics: Whether to include GitHub metrics
    Returns:
        Comparative health analysis with rankings
    """
    logger.info(f"Starting health comparison for {len(package_names)} packages")
    scorer = PackageHealthScorer()
    results = {}
    # Assess packages in parallel batches
    batch_size = 3
    for i in range(0, len(package_names), batch_size):
        batch = package_names[i:i + batch_size]
        batch_tasks = [
            scorer.assess_package_health(pkg_name, include_github_metrics=include_github_metrics)
            for pkg_name in batch
        ]
        batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
        for pkg_name, result in zip(batch, batch_results):
            if isinstance(result, Exception):
                results[pkg_name] = {
                    "error": str(result),
                    "overall_health": {"score": 0, "level": "critical"},
                    "category_scores": {cat: 0 for cat in scorer.weights.keys()}
                }
            else:
                results[pkg_name] = result
    # Create comparison rankings
    package_scores = [
        (pkg, result.get("overall_health", {}).get("score", 0))
        for pkg, result in results.items()
        if "error" not in result
    ]
    package_scores.sort(key=lambda x: x[1], reverse=True)
    # Generate comparison insights
    if package_scores:
        best_package, best_score = package_scores[0]
        worst_package, worst_score = package_scores[-1]
        avg_score = sum(score for _, score in package_scores) / len(package_scores)
        comparison_insights = {
            "best_package": {"name": best_package, "score": best_score},
            "worst_package": {"name": worst_package, "score": worst_score},
            "average_score": round(avg_score, 2),
            "score_range": best_score - worst_score,
            "rankings": [{"package": pkg, "score": score, "rank": i+1} 
                        for i, (pkg, score) in enumerate(package_scores)]
        }
    else:
        comparison_insights = {
            "best_package": None,
            "worst_package": None,
            "average_score": 0,
            "score_range": 0,
            "rankings": []
        }
    return {
        "comparison_timestamp": datetime.now(timezone.utc).isoformat(),
        "packages_compared": len(package_names),
        "detailed_results": results,
        "comparison_insights": comparison_insights,
        "recommendations": _generate_comparison_recommendations(comparison_insights, results)
    }
 def _generate_comparison_recommendations(
    insights: Dict[str, Any], results: Dict[str, Any]
 ) -> List[str]:
    """Generate recommendations for package comparison."""
    recommendations = []
    if not insights.get("rankings"):
        recommendations.append("❌ No successful health assessments to compare")
        return recommendations
    best = insights.get("best_package")
    worst = insights.get("worst_package")
    avg_score = insights.get("average_score", 0)
    if best and worst:
        recommendations.append(
            f"🥇 Best package: {best['name']} (score: {best['score']:.1f}/100)"
        )
        recommendations.append(
            f"🥉 Needs improvement: {worst['name']} (score: {worst['score']:.1f}/100)"
        )
        if best['score'] - worst['score'] > 30:
            recommendations.append("📊 Significant quality variation - consider standardizing practices")
        recommendations.append(f"📈 Average health score: {avg_score:.1f}/100")
        if avg_score >= 70:
            recommendations.append("✅ Overall good package health across portfolio")
        elif avg_score >= 55:
            recommendations.append("⚠️  Mixed package health - focus on improving lower-scoring packages")
        else:
            recommendations.append("🚨 Poor overall package health - systematic improvements needed")
    return recommendations
--- a/pypi_query_mcp/tools/health_tools.py
+++ b/pypi_query_mcp/tools/health_tools.py
@ -1,155 +0,0 @@
 """Package health assessment tools for PyPI packages."""
 import logging
 from typing import Any, Dict, List, Optional
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..tools.health_scorer import assess_pypi_package_health, compare_package_health
 logger = logging.getLogger(__name__)
 async def assess_package_health_score(
    package_name: str,
    version: Optional[str] = None,
    include_github_metrics: bool = True
 ) -> Dict[str, Any]:
    """
    Assess comprehensive health and quality of a PyPI package.
    This tool evaluates package health across multiple dimensions including maintenance,
    popularity, documentation, testing, security practices, compatibility, and metadata
    completeness to provide an overall health score and actionable recommendations.
    Args:
        package_name: Name of the package to assess for health and quality
        version: Specific version to assess (optional, defaults to latest version)
        include_github_metrics: Whether to fetch GitHub repository metrics for analysis
    Returns:
        Dictionary containing comprehensive health assessment including:
        - Overall health score (0-100) and level (excellent/good/fair/poor/critical)
        - Category-specific scores (maintenance, popularity, documentation, testing, etc.)
        - Detailed assessment breakdown with indicators and issues for each category
        - GitHub repository metrics (stars, forks, activity) if available
        - Actionable recommendations for health improvements
        - Strengths, weaknesses, and improvement priorities analysis
    Raises:
        InvalidPackageNameError: If package name is empty or invalid
        PackageNotFoundError: If package is not found on PyPI
        NetworkError: For network-related errors
        SearchError: If health assessment fails
    """
    if not package_name or not package_name.strip():
        raise InvalidPackageNameError(package_name)
    logger.info(f"MCP tool: Assessing health for package {package_name}")
    try:
        result = await assess_pypi_package_health(
            package_name=package_name,
            version=version,
            include_github_metrics=include_github_metrics
        )
        overall_score = result.get("overall_health", {}).get("score", 0)
        health_level = result.get("overall_health", {}).get("level", "unknown")
        logger.info(f"MCP tool: Health assessment completed for {package_name} - score: {overall_score:.1f}/100 ({health_level})")
        return result
    except (InvalidPackageNameError, NetworkError, SearchError) as e:
        logger.error(f"Error assessing health for {package_name}: {e}")
        return {
            "error": f"Health assessment failed: {e}",
            "error_type": type(e).__name__,
            "package": package_name,
            "version": version,
            "assessment_timestamp": "",
            "overall_health": {
                "score": 0,
                "level": "critical",
                "max_score": 100,
            },
            "category_scores": {
                "maintenance": 0,
                "popularity": 0,
                "documentation": 0,
                "testing": 0,
                "security": 0,
                "compatibility": 0,
                "metadata": 0,
            },
            "detailed_assessment": {},
            "recommendations": [f"❌ Health assessment failed: {e}"],
            "health_summary": {
                "strengths": [],
                "weaknesses": ["Assessment failure"],
                "improvement_priority": ["Resolve package access issues"],
            }
        }
 async def compare_packages_health_scores(
    package_names: List[str],
    include_github_metrics: bool = False
 ) -> Dict[str, Any]:
    """
    Compare health scores across multiple PyPI packages.
    This tool performs comparative health analysis across multiple packages,
    providing rankings, insights, and recommendations to help evaluate
    package ecosystem quality and identify the best options.
    Args:
        package_names: List of package names to compare for health and quality
        include_github_metrics: Whether to include GitHub metrics in the comparison
    Returns:
        Dictionary containing comparative health analysis including:
        - Detailed health results for each package
        - Health score rankings with best/worst package identification
        - Comparison insights (average scores, score ranges, rankings)
        - Recommendations for package selection and improvements
        - Statistical analysis of health across the package set
    Raises:
        ValueError: If package_names list is empty
        NetworkError: For network-related errors during analysis
        SearchError: If health comparison fails
    """
    if not package_names:
        raise ValueError("Package names list cannot be empty")
    logger.info(f"MCP tool: Starting health comparison for {len(package_names)} packages")
    try:
        result = await compare_package_health(
            package_names=package_names,
            include_github_metrics=include_github_metrics
        )
        comparison_insights = result.get("comparison_insights", {})
        best_package = comparison_insights.get("best_package", {})
        packages_compared = result.get("packages_compared", 0)
        logger.info(f"MCP tool: Health comparison completed for {packages_compared} packages - best: {best_package.get('name', 'unknown')} ({best_package.get('score', 0):.1f}/100)")
        return result
    except (ValueError, NetworkError, SearchError) as e:
        logger.error(f"Error in health comparison: {e}")
        return {
            "error": f"Health comparison failed: {e}",
            "error_type": type(e).__name__,
            "comparison_timestamp": "",
            "packages_compared": len(package_names),
            "detailed_results": {},
            "comparison_insights": {
                "best_package": None,
                "worst_package": None,
                "average_score": 0,
                "score_range": 0,
                "rankings": []
            },
            "recommendations": [f"❌ Health comparison failed: {e}"]
        }
--- a/pypi_query_mcp/tools/license_analyzer.py
+++ b/pypi_query_mcp/tools/license_analyzer.py
@ -1,727 +0,0 @@
 """License compatibility analysis tools for PyPI packages."""
 import asyncio
 import logging
 import re
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Set, Tuple
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..core.pypi_client import PyPIClient
 logger = logging.getLogger(__name__)
 class LicenseCompatibilityAnalyzer:
    """Comprehensive license compatibility analyzer for PyPI packages."""
    def __init__(self):
        self.timeout = 30.0
        # License compatibility matrix based on common license interactions
        # Key: primary license, Value: dict of compatible licenses with compatibility level
        self.compatibility_matrix = {
            "MIT": {
                "MIT": "compatible",
                "BSD": "compatible", 
                "Apache-2.0": "compatible",
                "ISC": "compatible",
                "GPL-2.0": "one-way",  # MIT can be used in GPL, not vice versa
                "GPL-3.0": "one-way",
                "LGPL-2.1": "compatible",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "review-required",
            },
            "BSD": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "compatible", 
                "ISC": "compatible",
                "GPL-2.0": "one-way",
                "GPL-3.0": "one-way",
                "LGPL-2.1": "compatible",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "review-required",
            },
            "Apache-2.0": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "compatible",
                "ISC": "compatible",
                "GPL-2.0": "incompatible",  # Patent clause conflicts
                "GPL-3.0": "one-way",  # Apache can go into GPL-3.0
                "LGPL-2.1": "review-required",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "review-required",
            },
            "GPL-2.0": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "incompatible",
                "ISC": "compatible",
                "GPL-2.0": "compatible",
                "GPL-3.0": "incompatible",  # GPL-2.0 and GPL-3.0 are incompatible
                "LGPL-2.1": "compatible",
                "LGPL-3.0": "incompatible",
                "MPL-2.0": "incompatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "incompatible",
            },
            "GPL-3.0": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "compatible",
                "ISC": "compatible",
                "GPL-2.0": "incompatible",
                "GPL-3.0": "compatible",
                "LGPL-2.1": "review-required",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "incompatible",
            },
            "LGPL-2.1": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "review-required",
                "ISC": "compatible", 
                "GPL-2.0": "compatible",
                "GPL-3.0": "review-required",
                "LGPL-2.1": "compatible",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "review-required",
            },
            "LGPL-3.0": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "compatible",
                "ISC": "compatible",
                "GPL-2.0": "incompatible",
                "GPL-3.0": "compatible",
                "LGPL-2.1": "compatible",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "review-required",
            },
            "MPL-2.0": {
                "MIT": "compatible",
                "BSD": "compatible",
                "Apache-2.0": "compatible",
                "ISC": "compatible",
                "GPL-2.0": "incompatible",
                "GPL-3.0": "compatible",
                "LGPL-2.1": "compatible",
                "LGPL-3.0": "compatible",
                "MPL-2.0": "compatible",
                "Unlicense": "compatible",
                "Public Domain": "compatible",
                "Proprietary": "review-required",
            },
        }
        # License categorization for easier analysis
        self.license_categories = {
            "permissive": ["MIT", "BSD", "Apache-2.0", "ISC", "Unlicense", "Public Domain"],
            "copyleft_weak": ["LGPL-2.1", "LGPL-3.0", "MPL-2.0"],
            "copyleft_strong": ["GPL-2.0", "GPL-3.0", "AGPL-3.0"],
            "proprietary": ["Proprietary", "Commercial", "All Rights Reserved"],
            "unknown": ["Unknown", "Other", "Custom"],
        }
        # Common license normalization patterns
        self.license_patterns = {
            r"MIT\s*License": "MIT",
            r"BSD\s*3[-\s]*Clause": "BSD",
            r"BSD\s*2[-\s]*Clause": "BSD",
            r"Apache\s*2\.0": "Apache-2.0",
            r"Apache\s*License\s*2\.0": "Apache-2.0",
            r"GNU\s*General\s*Public\s*License\s*v?2": "GPL-2.0", 
            r"GNU\s*General\s*Public\s*License\s*v?3": "GPL-3.0",
            r"GNU\s*Lesser\s*General\s*Public\s*License\s*v?2": "LGPL-2.1",
            r"GNU\s*Lesser\s*General\s*Public\s*License\s*v?3": "LGPL-3.0",
            r"Mozilla\s*Public\s*License\s*2\.0": "MPL-2.0",
            r"ISC\s*License": "ISC",
            r"Unlicense": "Unlicense",
            r"Public\s*Domain": "Public Domain",
        }
    async def analyze_package_license(
        self, 
        package_name: str,
        version: Optional[str] = None,
        include_dependencies: bool = True
    ) -> Dict[str, Any]:
        """
        Analyze license information for a PyPI package.
        Args:
            package_name: Name of the package to analyze
            version: Specific version to analyze (optional)
            include_dependencies: Whether to analyze dependency licenses
        Returns:
            Dictionary containing license analysis results
        """
        logger.info(f"Starting license analysis for package: {package_name}")
        try:
            async with PyPIClient() as client:
                package_data = await client.get_package_info(package_name, version)
            package_version = version or package_data["info"]["version"]
            # Analyze package license
            license_info = self._extract_license_info(package_data)
            # Analyze dependencies if requested
            dependency_licenses = []
            if include_dependencies:
                dependency_licenses = await self._analyze_dependency_licenses(
                    package_name, package_version
                )
            # Generate compatibility analysis
            compatibility_analysis = self._analyze_license_compatibility(
                license_info, dependency_licenses
            )
            # Calculate risk assessment
            risk_assessment = self._assess_license_risks(
                license_info, dependency_licenses, compatibility_analysis
            )
            return {
                "package": package_name,
                "version": package_version,
                "analysis_timestamp": datetime.now(timezone.utc).isoformat(),
                "license_info": license_info,
                "dependency_licenses": dependency_licenses,
                "compatibility_analysis": compatibility_analysis,
                "risk_assessment": risk_assessment,
                "recommendations": self._generate_license_recommendations(
                    license_info, dependency_licenses, compatibility_analysis, risk_assessment
                ),
                "analysis_summary": {
                    "total_dependencies_analyzed": len(dependency_licenses),
                    "unique_licenses_found": len(set(
                        [license_info.get("normalized_license", "Unknown")] + 
                        [dep.get("normalized_license", "Unknown") for dep in dependency_licenses]
                    )),
                    "license_conflicts": len(compatibility_analysis.get("conflicts", [])),
                    "review_required_count": len(compatibility_analysis.get("review_required", [])),
                }
            }
        except Exception as e:
            logger.error(f"License analysis failed for {package_name}: {e}")
            raise SearchError(f"License analysis failed: {e}") from e
    def _extract_license_info(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract and normalize license information from package data."""
        info = package_data.get("info", {})
        # Extract license from multiple sources
        license_field = info.get("license", "")
        license_classifier = self._extract_license_from_classifiers(
            info.get("classifiers", [])
        )
        # Normalize license
        normalized_license = self._normalize_license(license_field or license_classifier)
        # Categorize license
        license_category = self._categorize_license(normalized_license)
        return {
            "raw_license": license_field,
            "classifier_license": license_classifier,
            "normalized_license": normalized_license,
            "license_category": license_category,
            "license_url": self._extract_license_url(info),
            "license_confidence": self._assess_license_confidence(
                license_field, license_classifier, normalized_license
            ),
        }
    def _extract_license_from_classifiers(self, classifiers: List[str]) -> str:
        """Extract license information from PyPI classifiers."""
        license_classifiers = [
            c for c in classifiers if c.startswith("License ::")
        ]
        if not license_classifiers:
            return ""
        # Return the most specific license classifier
        return license_classifiers[-1].replace("License ::", "").strip()
    def _normalize_license(self, license_text: str) -> str:
        """Normalize license text to standard SPDX identifiers."""
        if not license_text:
            return "Unknown"
        license_text_clean = license_text.strip()
        # Check for exact matches first
        common_licenses = {
            "MIT": "MIT",
            "BSD": "BSD", 
            "Apache": "Apache-2.0",
            "GPL": "GPL-3.0",  # Default to GPL-3.0 if version unspecified
            "LGPL": "LGPL-3.0",
            "MPL": "MPL-2.0",
        }
        if license_text_clean in common_licenses:
            return common_licenses[license_text_clean]
        # Pattern matching
        for pattern, normalized in self.license_patterns.items():
            if re.search(pattern, license_text_clean, re.IGNORECASE):
                return normalized
        # Check if it contains known license names
        license_lower = license_text_clean.lower()
        if "mit" in license_lower:
            return "MIT"
        elif "bsd" in license_lower:
            return "BSD"
        elif "apache" in license_lower:
            return "Apache-2.0"
        elif "gpl" in license_lower and "lgpl" not in license_lower:
            return "GPL-3.0"
        elif "lgpl" in license_lower:
            return "LGPL-3.0"
        elif "mozilla" in license_lower or "mpl" in license_lower:
            return "MPL-2.0"
        elif "unlicense" in license_lower:
            return "Unlicense"
        elif "public domain" in license_lower:
            return "Public Domain"
        elif any(prop in license_lower for prop in ["proprietary", "commercial", "all rights reserved"]):
            return "Proprietary"
        return "Other"
    def _categorize_license(self, normalized_license: str) -> str:
        """Categorize license into major categories."""
        for category, licenses in self.license_categories.items():
            if normalized_license in licenses:
                return category
        return "unknown"
    def _extract_license_url(self, info: Dict[str, Any]) -> str:
        """Extract license URL from package info."""
        # Check project URLs
        project_urls = info.get("project_urls", {}) or {}
        for key, url in project_urls.items():
            if "license" in key.lower():
                return url
        # Check home page for license info
        home_page = info.get("home_page", "")
        if home_page and "github.com" in home_page:
            return f"{home_page.rstrip('/')}/blob/main/LICENSE"
        return ""
    def _assess_license_confidence(
        self, raw_license: str, classifier_license: str, normalized_license: str
    ) -> str:
        """Assess confidence level in license detection."""
        if not raw_license and not classifier_license:
            return "low"
        if normalized_license == "Unknown" or normalized_license == "Other":
            return "low"
        if raw_license and classifier_license and raw_license in classifier_license:
            return "high"
        elif raw_license or classifier_license:
            return "medium"
        else:
            return "low"
    async def _analyze_dependency_licenses(
        self, package_name: str, version: str
    ) -> List[Dict[str, Any]]:
        """Analyze licenses of package dependencies."""
        try:
            async with PyPIClient() as client:
                package_data = await client.get_package_info(package_name, version)
                # Extract dependencies
                requires_dist = package_data.get("info", {}).get("requires_dist", []) or []
                dependencies = []
                for req in requires_dist:
                    # Parse dependency name (simplified)
                    dep_name = req.split()[0].split(">=")[0].split("==")[0].split("~=")[0].split("!=")[0]
                    if dep_name and not dep_name.startswith("extra"):
                        dependencies.append(dep_name)
                # Analyze dependency licenses (limit to top 15 to avoid overwhelming)
                dependency_licenses = []
                for dep_name in dependencies[:15]:
                    try:
                        dep_data = await client.get_package_info(dep_name)
                        dep_license_info = self._extract_license_info(dep_data)
                        dependency_licenses.append({
                            "package": dep_name,
                            "version": dep_data.get("info", {}).get("version", ""),
                            **dep_license_info
                        })
                    except Exception as e:
                        logger.debug(f"Failed to analyze license for dependency {dep_name}: {e}")
                        dependency_licenses.append({
                            "package": dep_name,
                            "version": "",
                            "normalized_license": "Unknown",
                            "license_category": "unknown",
                            "license_confidence": "low",
                            "error": str(e)
                        })
                return dependency_licenses
        except Exception as e:
            logger.warning(f"Dependency license analysis failed: {e}")
            return []
    def _analyze_license_compatibility(
        self, package_license: Dict[str, Any], dependency_licenses: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """Analyze license compatibility between package and its dependencies."""
        main_license = package_license.get("normalized_license", "Unknown")
        compatible = []
        incompatible = []
        review_required = []
        one_way = []
        unknown = []
        for dep in dependency_licenses:
            dep_license = dep.get("normalized_license", "Unknown")
            dep_package = dep.get("package", "unknown")
            if main_license == "Unknown" or dep_license == "Unknown":
                unknown.append({
                    "package": dep_package,
                    "license": dep_license,
                    "reason": "License information unavailable"
                })
                continue
            compatibility = self._check_license_compatibility(main_license, dep_license)
            if compatibility == "compatible":
                compatible.append({
                    "package": dep_package,
                    "license": dep_license,
                })
            elif compatibility == "incompatible":
                incompatible.append({
                    "package": dep_package,
                    "license": dep_license,
                    "reason": f"{main_license} and {dep_license} are incompatible"
                })
            elif compatibility == "review-required":
                review_required.append({
                    "package": dep_package,
                    "license": dep_license,
                    "reason": f"Manual review needed for {main_license} + {dep_license}"
                })
            elif compatibility == "one-way":
                one_way.append({
                    "package": dep_package,
                    "license": dep_license,
                    "reason": f"{dep_license} can be used in {main_license} project"
                })
        return {
            "main_license": main_license,
            "compatible": compatible,
            "incompatible": incompatible,
            "review_required": review_required,
            "one_way": one_way,
            "unknown": unknown,
            "conflicts": incompatible,  # Alias for easier access
        }
    def _check_license_compatibility(self, license1: str, license2: str) -> str:
        """Check compatibility between two licenses."""
        if license1 in self.compatibility_matrix:
            return self.compatibility_matrix[license1].get(license2, "unknown")
        # Fallback compatibility rules
        if license1 == license2:
            return "compatible"
        # Default to review required for unknown combinations
        return "review-required"
    def _assess_license_risks(
        self, 
        package_license: Dict[str, Any], 
        dependency_licenses: List[Dict[str, Any]], 
        compatibility_analysis: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Assess overall license risks for the project."""
        risks = []
        risk_score = 0
        main_license = package_license.get("normalized_license", "Unknown")
        main_category = package_license.get("license_category", "unknown")
        # Check for incompatible licenses
        incompatible_count = len(compatibility_analysis.get("incompatible", []))
        if incompatible_count > 0:
            risks.append(f"Found {incompatible_count} incompatible license(s)")
            risk_score += incompatible_count * 30
        # Check for unknown licenses
        unknown_count = len(compatibility_analysis.get("unknown", []))
        if unknown_count > 0:
            risks.append(f"Found {unknown_count} dependency(ies) with unknown licenses")
            risk_score += unknown_count * 10
        # Check for review-required licenses
        review_count = len(compatibility_analysis.get("review_required", []))
        if review_count > 0:
            risks.append(f"Found {review_count} license(s) requiring manual review")
            risk_score += review_count * 15
        # Check for copyleft contamination risk
        if main_category == "permissive":
            copyleft_deps = [
                dep for dep in dependency_licenses 
                if dep.get("license_category") in ["copyleft_weak", "copyleft_strong"]
            ]
            if copyleft_deps:
                risks.append(f"Permissive project using {len(copyleft_deps)} copyleft dependencies")
                risk_score += len(copyleft_deps) * 20
        # Check for proprietary license risks
        proprietary_deps = [
            dep for dep in dependency_licenses 
            if dep.get("license_category") == "proprietary"
        ]
        if proprietary_deps:
            risks.append(f"Found {len(proprietary_deps)} proprietary dependencies")
            risk_score += len(proprietary_deps) * 25
        # Calculate risk level
        if risk_score >= 80:
            risk_level = "critical"
        elif risk_score >= 50:
            risk_level = "high"
        elif risk_score >= 25:
            risk_level = "medium"
        elif risk_score > 0:
            risk_level = "low"
        else:
            risk_level = "minimal"
        return {
            "risk_score": min(risk_score, 100),
            "risk_level": risk_level,
            "risk_factors": risks,
            "compliance_status": "compliant" if risk_score < 25 else "review-needed",
        }
    def _generate_license_recommendations(
        self,
        package_license: Dict[str, Any],
        dependency_licenses: List[Dict[str, Any]],
        compatibility_analysis: Dict[str, Any],
        risk_assessment: Dict[str, Any]
    ) -> List[str]:
        """Generate actionable license recommendations."""
        recommendations = []
        main_license = package_license.get("normalized_license", "Unknown")
        risk_level = risk_assessment.get("risk_level", "unknown")
        # High-level recommendations based on risk
        if risk_level == "critical":
            recommendations.append("🚨 Critical license issues detected - immediate legal review required")
        elif risk_level == "high":
            recommendations.append("⚠️  High license risk - review and resolve conflicts before release")
        elif risk_level == "medium":
            recommendations.append("⚠️  Moderate license risk - review recommendations below")
        elif risk_level == "minimal":
            recommendations.append("✅ License compatibility appears good")
        # Specific recommendations for incompatible licenses
        incompatible = compatibility_analysis.get("incompatible", [])
        if incompatible:
            recommendations.append(f"🔴 Remove or replace {len(incompatible)} incompatible dependencies:")
            for dep in incompatible[:3]:  # Show first 3
                recommendations.append(f"  - {dep['package']} ({dep['license']}): {dep.get('reason', '')}")
        # Recommendations for review-required licenses
        review_required = compatibility_analysis.get("review_required", [])
        if review_required:
            recommendations.append(f"📋 Manual review needed for {len(review_required)} dependencies:")
            for dep in review_required[:3]:
                recommendations.append(f"  - {dep['package']} ({dep['license']})")
        # Unknown license recommendations
        unknown = compatibility_analysis.get("unknown", [])
        if unknown:
            recommendations.append(f"❓ Investigate {len(unknown)} dependencies with unknown licenses")
        # License confidence recommendations
        if package_license.get("license_confidence") == "low":
            recommendations.append("📝 Consider adding clear license information to your package")
        # Category-specific recommendations
        main_category = package_license.get("license_category", "unknown")
        if main_category == "copyleft_strong":
            recommendations.append("ℹ️  GPL license requires derivative works to also be GPL")
        elif main_category == "permissive":
            recommendations.append("ℹ️  Permissive license allows flexible usage")
        return recommendations
 # Main analysis functions
 async def analyze_package_license_compatibility(
    package_name: str,
    version: Optional[str] = None,
    include_dependencies: bool = True
 ) -> Dict[str, Any]:
    """
    Analyze license compatibility for a PyPI package.
    Args:
        package_name: Name of the package to analyze
        version: Specific version to analyze (optional)
        include_dependencies: Whether to analyze dependency licenses
    Returns:
        Comprehensive license compatibility analysis
    """
    analyzer = LicenseCompatibilityAnalyzer()
    return await analyzer.analyze_package_license(
        package_name, version, include_dependencies
    )
 async def check_license_compliance_bulk(
    package_names: List[str],
    target_license: Optional[str] = None
 ) -> Dict[str, Any]:
    """
    Check license compliance for multiple packages.
    Args:
        package_names: List of package names to check
        target_license: Target license for compatibility checking
    Returns:
        Bulk license compliance report
    """
    logger.info(f"Starting bulk license compliance check for {len(package_names)} packages")
    analyzer = LicenseCompatibilityAnalyzer()
    results = {}
    summary = {
        "total_packages": len(package_names),
        "compliant_packages": 0,
        "non_compliant_packages": 0,
        "unknown_license_packages": 0,
        "high_risk_packages": [],
        "analysis_timestamp": datetime.now(timezone.utc).isoformat()
    }
    # Analyze packages in parallel batches
    batch_size = 5
    for i in range(0, len(package_names), batch_size):
        batch = package_names[i:i + batch_size]
        batch_tasks = [
            analyzer.analyze_package_license(pkg_name, include_dependencies=False)
            for pkg_name in batch
        ]
        batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
        for pkg_name, result in zip(batch, batch_results):
            if isinstance(result, Exception):
                results[pkg_name] = {
                    "error": str(result),
                    "analysis_status": "failed"
                }
                summary["unknown_license_packages"] += 1
            else:
                results[pkg_name] = result
                # Update summary
                risk_level = result.get("risk_assessment", {}).get("risk_level", "unknown")
                if risk_level in ["minimal", "low"]:
                    summary["compliant_packages"] += 1
                else:
                    summary["non_compliant_packages"] += 1
                if risk_level in ["high", "critical"]:
                    summary["high_risk_packages"].append({
                        "package": pkg_name,
                        "license": result.get("license_info", {}).get("normalized_license", "Unknown"),
                        "risk_level": risk_level
                    })
    return {
        "summary": summary,
        "detailed_results": results,
        "target_license": target_license,
        "recommendations": _generate_bulk_license_recommendations(summary, results)
    }
 def _generate_bulk_license_recommendations(summary: Dict[str, Any], results: Dict[str, Any]) -> List[str]:
    """Generate recommendations for bulk license analysis."""
    recommendations = []
    compliant = summary["compliant_packages"]
    total = summary["total_packages"]
    if compliant == total:
        recommendations.append("✅ All packages appear to have compliant licenses")
    else:
        non_compliant = summary["non_compliant_packages"]
        percentage = (non_compliant / total) * 100
        recommendations.append(
            f"⚠️  {non_compliant}/{total} packages ({percentage:.1f}%) have license compliance issues"
        )
    high_risk = summary["high_risk_packages"]
    if high_risk:
        recommendations.append(
            f"🚨 {len(high_risk)} packages are high risk: {', '.join([p['package'] for p in high_risk])}"
        )
        recommendations.append("Priority: Address high-risk packages immediately")
    unknown = summary["unknown_license_packages"]
    if unknown > 0:
        recommendations.append(f"❓ {unknown} packages have unknown or unclear licenses")
        recommendations.append("Consider investigating these packages for license clarity")
    return recommendations
--- a/pypi_query_mcp/tools/license_tools.py
+++ b/pypi_query_mcp/tools/license_tools.py
@ -1,154 +0,0 @@
 """License compatibility analysis tools for PyPI packages."""
 import logging
 from typing import Any, Dict, List, Optional
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..tools.license_analyzer import analyze_package_license_compatibility, check_license_compliance_bulk
 logger = logging.getLogger(__name__)
 async def analyze_pypi_package_license(
    package_name: str,
    version: Optional[str] = None,
    include_dependencies: bool = True
 ) -> Dict[str, Any]:
    """
    Analyze license compatibility for a PyPI package.
    This tool provides comprehensive license analysis including license identification,
    dependency license scanning, compatibility checking, and risk assessment to help
    ensure your project complies with open source license requirements.
    Args:
        package_name: Name of the package to analyze for license compatibility
        version: Specific version to analyze (optional, defaults to latest version)
        include_dependencies: Whether to analyze dependency licenses for compatibility
    Returns:
        Dictionary containing comprehensive license analysis including:
        - License identification and normalization (SPDX format)
        - License categorization (permissive, copyleft, proprietary, etc.)
        - Dependency license analysis and compatibility matrix
        - Risk assessment with score and risk level (minimal, low, medium, high, critical)
        - Compatibility analysis highlighting conflicts and review-required combinations
        - Actionable recommendations for license compliance
    Raises:
        InvalidPackageNameError: If package name is empty or invalid
        PackageNotFoundError: If package is not found on PyPI
        NetworkError: For network-related errors
        SearchError: If license analysis fails
    """
    if not package_name or not package_name.strip():
        raise InvalidPackageNameError(package_name)
    logger.info(f"MCP tool: Analyzing license compatibility for package {package_name}")
    try:
        result = await analyze_package_license_compatibility(
            package_name=package_name,
            version=version,
            include_dependencies=include_dependencies
        )
        logger.info(f"MCP tool: License analysis completed for {package_name} - {result.get('analysis_summary', {}).get('license_conflicts', 0)} conflicts found")
        return result
    except (InvalidPackageNameError, NetworkError, SearchError) as e:
        logger.error(f"Error analyzing license for {package_name}: {e}")
        return {
            "error": f"License analysis failed: {e}",
            "error_type": type(e).__name__,
            "package": package_name,
            "version": version,
            "analysis_timestamp": "",
            "license_info": {
                "normalized_license": "Unknown",
                "license_category": "unknown",
                "license_confidence": "low",
            },
            "dependency_licenses": [],
            "compatibility_analysis": {
                "main_license": "Unknown",
                "compatible": [],
                "incompatible": [],
                "review_required": [],
                "conflicts": [],
            },
            "risk_assessment": {
                "risk_score": 100,
                "risk_level": "critical",
                "risk_factors": [f"License analysis failed: {e}"],
                "compliance_status": "unknown",
            },
            "recommendations": [f"❌ License analysis failed: {e}"],
            "analysis_summary": {
                "total_dependencies_analyzed": 0,
                "unique_licenses_found": 0,
                "license_conflicts": 0,
                "review_required_count": 0,
            }
        }
 async def check_bulk_license_compliance(
    package_names: List[str],
    target_license: Optional[str] = None
 ) -> Dict[str, Any]:
    """
    Check license compliance for multiple PyPI packages.
    This tool performs bulk license compliance checking across multiple packages,
    providing a consolidated report to help ensure your entire package ecosystem
    complies with license requirements and identifying potential legal risks.
    Args:
        package_names: List of package names to check for license compliance
        target_license: Target license for compatibility checking (optional)
    Returns:
        Dictionary containing bulk compliance analysis including:
        - Summary statistics (total packages, compliant/non-compliant counts)
        - Detailed license analysis for each package
        - High-risk packages requiring immediate attention
        - Unknown license packages needing investigation
        - Prioritized recommendations for compliance remediation
    Raises:
        ValueError: If package_names list is empty
        NetworkError: For network-related errors during analysis
        SearchError: If bulk compliance checking fails
    """
    if not package_names:
        raise ValueError("Package names list cannot be empty")
    logger.info(f"MCP tool: Starting bulk license compliance check for {len(package_names)} packages")
    try:
        result = await check_license_compliance_bulk(
            package_names=package_names,
            target_license=target_license
        )
        logger.info(f"MCP tool: Bulk license compliance completed - {result.get('summary', {}).get('non_compliant_packages', 0)} non-compliant packages found")
        return result
    except (ValueError, NetworkError, SearchError) as e:
        logger.error(f"Error in bulk license compliance check: {e}")
        return {
            "error": f"Bulk license compliance check failed: {e}",
            "error_type": type(e).__name__,
            "summary": {
                "total_packages": len(package_names),
                "compliant_packages": 0,
                "non_compliant_packages": 0,
                "unknown_license_packages": len(package_names),
                "high_risk_packages": [],
                "analysis_timestamp": ""
            },
            "detailed_results": {},
            "target_license": target_license,
            "recommendations": [f"❌ Bulk license compliance check failed: {e}"]
        }
--- a/pypi_query_mcp/tools/requirements_analyzer.py
+++ b/pypi_query_mcp/tools/requirements_analyzer.py
@ -1,947 +0,0 @@
 """Requirements file parsing and analysis tools for Python projects."""
 import asyncio
 import logging
 import re
 import tomllib
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..core.pypi_client import PyPIClient
 logger = logging.getLogger(__name__)
 class RequirementsAnalyzer:
    """Comprehensive requirements file analyzer for Python projects."""
    def __init__(self):
        self.timeout = 30.0
        # Supported requirement file patterns
        self.requirement_patterns = {
            "requirements.txt": r"requirements.*\.txt",
            "pyproject.toml": r"pyproject\.toml",
            "setup.py": r"setup\.py",
            "Pipfile": r"Pipfile",
            "poetry.lock": r"poetry\.lock",
            "conda.yml": r"(conda|environment)\.ya?ml",
        }
        # Version specifier patterns
        self.version_patterns = {
            "exact": r"==\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
            "gte": r">=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
            "gt": r">\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
            "lte": r"<=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
            "lt": r"<\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
            "compatible": r"~=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
            "not_equal": r"!=\s*([0-9]+(?:\.[0-9]+)*(?:[a-zA-Z][0-9]*)?)",
        }
    async def analyze_requirements_file(
        self,
        file_path: str,
        check_updates: bool = True,
        security_scan: bool = True,
        compatibility_check: bool = True
    ) -> Dict[str, Any]:
        """
        Analyze a requirements file for dependencies, versions, security, and compatibility.
        Args:
            file_path: Path to the requirements file
            check_updates: Whether to check for package updates
            security_scan: Whether to perform security vulnerability scanning  
            compatibility_check: Whether to check Python version compatibility
        Returns:
            Dictionary containing comprehensive requirements analysis
        """
        logger.info(f"Starting requirements analysis for: {file_path}")
        try:
            # Parse requirements file
            parsed_requirements = await self._parse_requirements_file(file_path)
            if not parsed_requirements["dependencies"]:
                return {
                    "file_path": file_path,
                    "analysis_timestamp": datetime.now(timezone.utc).isoformat(),
                    "file_info": parsed_requirements["file_info"],
                    "dependencies": [],
                    "analysis_summary": {
                        "total_dependencies": 0,
                        "outdated_packages": 0,
                        "security_vulnerabilities": 0,
                        "compatibility_issues": 0,
                    },
                    "recommendations": ["No dependencies found to analyze"],
                    "error": "No dependencies found in requirements file"
                }
            # Analyze dependencies in parallel
            analysis_tasks = []
            # Basic dependency analysis (always done)
            analysis_tasks.append(self._analyze_dependency_health(parsed_requirements["dependencies"]))
            # Optional analyses
            if check_updates:
                analysis_tasks.append(self._check_package_updates(parsed_requirements["dependencies"]))
            else:
                analysis_tasks.append(asyncio.create_task(self._empty_updates_result()))
            if security_scan:
                analysis_tasks.append(self._scan_dependencies_security(parsed_requirements["dependencies"]))
            else:
                analysis_tasks.append(asyncio.create_task(self._empty_security_result()))
            if compatibility_check:
                python_version = parsed_requirements.get("python_version")
                analysis_tasks.append(self._check_dependencies_compatibility(parsed_requirements["dependencies"], python_version))
            else:
                analysis_tasks.append(asyncio.create_task(self._empty_compatibility_result()))
            # Execute analyses
            results = await asyncio.gather(*analysis_tasks, return_exceptions=True)
            # Unpack results
            health_analysis = results[0] if not isinstance(results[0], Exception) else {"healthy": [], "issues": []}
            update_analysis = results[1] if not isinstance(results[1], Exception) else {"outdated": [], "current": []}
            security_analysis = results[2] if not isinstance(results[2], Exception) else {"vulnerabilities": [], "secure": []}
            compatibility_analysis = results[3] if not isinstance(results[3], Exception) else {"compatible": [], "incompatible": []}
            # Generate comprehensive analysis
            analysis_summary = self._generate_analysis_summary(
                parsed_requirements["dependencies"],
                health_analysis,
                update_analysis, 
                security_analysis,
                compatibility_analysis
            )
            recommendations = self._generate_requirements_recommendations(
                parsed_requirements,
                health_analysis,
                update_analysis,
                security_analysis,
                compatibility_analysis,
                analysis_summary
            )
            return {
                "file_path": file_path,
                "analysis_timestamp": datetime.now(timezone.utc).isoformat(),
                "file_info": parsed_requirements["file_info"],
                "dependencies": parsed_requirements["dependencies"],
                "dependency_analysis": {
                    "health": health_analysis,
                    "updates": update_analysis if check_updates else None,
                    "security": security_analysis if security_scan else None,
                    "compatibility": compatibility_analysis if compatibility_check else None,
                },
                "analysis_summary": analysis_summary,
                "recommendations": recommendations,
                "python_requirements": parsed_requirements.get("python_version"),
            }
        except Exception as e:
            logger.error(f"Requirements analysis failed for {file_path}: {e}")
            raise SearchError(f"Requirements analysis failed: {e}") from e
    async def _parse_requirements_file(self, file_path: str) -> Dict[str, Any]:
        """Parse requirements from various file formats."""
        path = Path(file_path)
        if not path.exists():
            raise FileNotFoundError(f"Requirements file not found: {file_path}")
        file_info = {
            "name": path.name,
            "format": self._detect_file_format(path.name),
            "size_bytes": path.stat().st_size,
            "modified_time": datetime.fromtimestamp(path.stat().st_mtime, timezone.utc).isoformat(),
        }
        # Parse based on file format
        if path.name.endswith('.txt'):
            dependencies, python_version = await self._parse_requirements_txt(path)
        elif path.name == 'pyproject.toml':
            dependencies, python_version = await self._parse_pyproject_toml(path)
        elif path.name == 'setup.py':
            dependencies, python_version = await self._parse_setup_py(path)
        elif path.name == 'Pipfile':
            dependencies, python_version = await self._parse_pipfile(path)
        elif path.name.endswith('.yml') or path.name.endswith('.yaml'):
            dependencies, python_version = await self._parse_conda_yml(path)
        else:
            # Try to parse as requirements.txt format
            dependencies, python_version = await self._parse_requirements_txt(path)
        return {
            "file_info": file_info,
            "dependencies": dependencies,
            "python_version": python_version,
        }
    def _detect_file_format(self, filename: str) -> str:
        """Detect requirements file format."""
        filename_lower = filename.lower()
        for fmt, pattern in self.requirement_patterns.items():
            if re.match(pattern, filename_lower):
                return fmt
        return "unknown"
    async def _parse_requirements_txt(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
        """Parse requirements.txt format files."""
        dependencies = []
        python_version = None
        try:
            content = path.read_text(encoding="utf-8")
            lines = content.splitlines()
            for line_num, line in enumerate(lines, 1):
                line = line.strip()
                # Skip comments and empty lines
                if not line or line.startswith('#'):
                    continue
                # Skip -r and -e directives (for now)
                if line.startswith(('-r', '-e', '--')):
                    continue
                # Parse requirement line
                dep = self._parse_requirement_line(line, line_num)
                if dep:
                    dependencies.append(dep)
        except Exception as e:
            logger.warning(f"Failed to parse requirements.txt {path}: {e}")
        return dependencies, python_version
    async def _parse_pyproject_toml(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
        """Parse pyproject.toml files."""
        dependencies = []
        python_version = None
        try:
            content = path.read_text(encoding="utf-8")
            data = tomllib.loads(content)
            # Extract Python version requirement
            build_system = data.get("build-system", {})
            project = data.get("project", {})
            tool_poetry = data.get("tool", {}).get("poetry", {})
            # Check for Python version in different places
            if project.get("requires-python"):
                python_version = project["requires-python"]
            elif tool_poetry.get("dependencies", {}).get("python"):
                python_version = tool_poetry["dependencies"]["python"]
            # Extract dependencies from project.dependencies
            if "dependencies" in project:
                for dep_line in project["dependencies"]:
                    dep = self._parse_requirement_line(dep_line, 0)
                    if dep:
                        dependencies.append(dep)
            # Extract from tool.poetry.dependencies
            if "tool" in data and "poetry" in data["tool"] and "dependencies" in data["tool"]["poetry"]:
                poetry_deps = data["tool"]["poetry"]["dependencies"]
                for name, version_spec in poetry_deps.items():
                    if name.lower() == "python":
                        continue  # Skip Python version
                    if isinstance(version_spec, str):
                        req_line = f"{name}{version_spec}" if version_spec.startswith(('=', '<', '>', '~', '^', '!')) else f"{name}=={version_spec}"
                    else:
                        # Handle complex version specifications
                        req_line = f"{name}>={version_spec.get('version', '0.0.0')}"
                    dep = self._parse_requirement_line(req_line, 0)
                    if dep:
                        dependencies.append(dep)
        except Exception as e:
            logger.warning(f"Failed to parse pyproject.toml {path}: {e}")
        return dependencies, python_version
    async def _parse_setup_py(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
        """Parse setup.py files (basic extraction)."""
        dependencies = []
        python_version = None
        try:
            content = path.read_text(encoding="utf-8")
            # Look for install_requires
            install_requires_match = re.search(r"install_requires\s*=\s*\[(.*?)\]", content, re.DOTALL)
            if install_requires_match:
                deps_text = install_requires_match.group(1)
                # Extract quoted strings
                quoted_deps = re.findall(r'["\']([^"\']+)["\']', deps_text)
                for dep_line in quoted_deps:
                    dep = self._parse_requirement_line(dep_line, 0)
                    if dep:
                        dependencies.append(dep)
            # Look for python_requires
            python_requires_match = re.search(r"python_requires\s*=\s*[\"']([^\"']+)[\"']", content)
            if python_requires_match:
                python_version = python_requires_match.group(1)
        except Exception as e:
            logger.warning(f"Failed to parse setup.py {path}: {e}")
        return dependencies, python_version
    async def _parse_pipfile(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
        """Parse Pipfile format."""
        dependencies = []
        python_version = None
        try:
            content = path.read_text(encoding="utf-8")
            data = tomllib.loads(content)
            # Extract Python version
            if "requires" in data and "python_version" in data["requires"]:
                python_version = f">={data['requires']['python_version']}"
            # Extract packages
            for section in ["packages", "dev-packages"]:
                if section in data:
                    for name, version_spec in data[section].items():
                        if isinstance(version_spec, str):
                            req_line = f"{name}{version_spec}" if version_spec.startswith(('=', '<', '>', '~', '^', '!')) else f"{name}=={version_spec}"
                        else:
                            req_line = f"{name}>={version_spec.get('version', '0.0.0')}"
                        dep = self._parse_requirement_line(req_line, 0)
                        if dep:
                            dep["dev_dependency"] = (section == "dev-packages")
                            dependencies.append(dep)
        except Exception as e:
            logger.warning(f"Failed to parse Pipfile {path}: {e}")
        return dependencies, python_version
    async def _parse_conda_yml(self, path: Path) -> Tuple[List[Dict[str, Any]], Optional[str]]:
        """Parse conda environment.yml files."""
        dependencies = []
        python_version = None
        try:
            import yaml
            content = path.read_text(encoding="utf-8")
            data = yaml.safe_load(content)
            if "dependencies" in data:
                for dep in data["dependencies"]:
                    if isinstance(dep, str):
                        if dep.startswith("python"):
                            # Extract Python version
                            python_match = re.search(r"python\s*([><=~!]+)\s*([0-9.]+)", dep)
                            if python_match:
                                python_version = f"{python_match.group(1)}{python_match.group(2)}"
                        else:
                            parsed_dep = self._parse_requirement_line(dep, 0)
                            if parsed_dep:
                                dependencies.append(parsed_dep)
        except Exception as e:
            logger.warning(f"Failed to parse conda.yml {path}: {e}")
        return dependencies, python_version
    def _parse_requirement_line(self, line: str, line_number: int) -> Optional[Dict[str, Any]]:
        """Parse a single requirement line."""
        try:
            # Remove inline comments
            if '#' in line:
                line = line[:line.index('#')].strip()
            if not line:
                return None
            # Handle extras (package[extra1,extra2])
            extras = []
            extras_match = re.search(r'\[([^\]]+)\]', line)
            if extras_match:
                extras = [e.strip() for e in extras_match.group(1).split(',')]
                line = re.sub(r'\[([^\]]+)\]', '', line)
            # Parse package name and version specifiers
            # Split on version operators
            version_ops = ['>=', '<=', '==', '!=', '~=', '>', '<']
            package_name = line
            version_specifiers = []
            for op in version_ops:
                if op in line:
                    parts = line.split(op)
                    package_name = parts[0].strip()
                    if len(parts) > 1:
                        version_specifiers.append({
                            "operator": op,
                            "version": parts[1].strip().split(',')[0].strip()
                        })
                    break
            # Handle comma-separated version specs
            if ',' in line and version_specifiers:
                remaining = line.split(version_specifiers[0]["operator"], 1)[1]
                for spec in remaining.split(',')[1:]:
                    spec = spec.strip()
                    for op in version_ops:
                        if spec.startswith(op):
                            version_specifiers.append({
                                "operator": op,
                                "version": spec[len(op):].strip()
                            })
                            break
            # Clean package name
            package_name = re.sub(r'[<>=!~,\s].*', '', package_name).strip()
            if not package_name:
                return None
            return {
                "name": package_name,
                "version_specifiers": version_specifiers,
                "extras": extras,
                "line_number": line_number,
                "raw_line": line.strip(),
            }
        except Exception as e:
            logger.debug(f"Failed to parse requirement line '{line}': {e}")
            return None
    async def _analyze_dependency_health(self, dependencies: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Analyze overall health of dependencies."""
        healthy = []
        issues = []
        for dep in dependencies:
            name = dep["name"]
            version_specs = dep["version_specifiers"]
            # Check for problematic version specifications
            health_issues = []
            if not version_specs:
                health_issues.append("No version constraint (could lead to instability)")
            else:
                # Check for overly restrictive versions
                exact_versions = [spec for spec in version_specs if spec["operator"] == "=="]
                if exact_versions:
                    health_issues.append("Exact version pinning (may cause conflicts)")
                # Check for very loose constraints
                loose_constraints = [spec for spec in version_specs if spec["operator"] in [">", ">="]]
                if loose_constraints and not any(spec["operator"] in ["<", "<="] for spec in version_specs):
                    health_issues.append("No upper bound (may break with future versions)")
            if health_issues:
                issues.append({
                    "package": name,
                    "issues": health_issues,
                    "current_spec": version_specs
                })
            else:
                healthy.append({
                    "package": name,
                    "version_spec": version_specs
                })
        return {
            "healthy": healthy,
            "issues": issues,
            "health_score": len(healthy) / len(dependencies) * 100 if dependencies else 0
        }
    async def _check_package_updates(self, dependencies: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Check for available package updates."""
        outdated = []
        current = []
        async with PyPIClient() as client:
            # Process in batches to avoid overwhelming PyPI
            batch_size = 10
            for i in range(0, len(dependencies), batch_size):
                batch = dependencies[i:i + batch_size]
                batch_tasks = []
                for dep in batch:
                    task = self._check_single_package_update(client, dep)
                    batch_tasks.append(task)
                batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
                for dep, result in zip(batch, batch_results):
                    if isinstance(result, Exception):
                        logger.debug(f"Failed to check updates for {dep['name']}: {result}")
                        continue
                    if result["has_update"]:
                        outdated.append(result)
                    else:
                        current.append(result)
        return {
            "outdated": outdated,
            "current": current,
            "update_percentage": len(outdated) / len(dependencies) * 100 if dependencies else 0
        }
    async def _check_single_package_update(self, client: PyPIClient, dep: Dict[str, Any]) -> Dict[str, Any]:
        """Check if a single package has updates available."""
        try:
            package_data = await client.get_package_info(dep["name"])
            latest_version = package_data["info"]["version"]
            # For now, we'll do a simple comparison
            # In a real implementation, you'd want proper version comparison
            has_update = True  # Placeholder logic
            return {
                "package": dep["name"],
                "current_spec": dep["version_specifiers"],
                "latest_version": latest_version,
                "has_update": has_update,
                "update_recommendation": f"Update to {latest_version}"
            }
        except Exception as e:
            return {
                "package": dep["name"],
                "current_spec": dep["version_specifiers"],
                "latest_version": "unknown",
                "has_update": False,
                "error": str(e)
            }
    async def _scan_dependencies_security(self, dependencies: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Scan dependencies for security vulnerabilities."""
        # Import security scanner if available
        try:
            from .security import scan_package_security
            vulnerabilities = []
            secure = []
            # Process in small batches
            batch_size = 5
            for i in range(0, len(dependencies), batch_size):
                batch = dependencies[i:i + batch_size]
                batch_tasks = []
                for dep in batch:
                    task = self._scan_single_dependency_security(dep)
                    batch_tasks.append(task)
                batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
                for dep, result in zip(batch, batch_results):
                    if isinstance(result, Exception):
                        logger.debug(f"Failed to scan security for {dep['name']}: {result}")
                        continue
                    if result["vulnerabilities"]:
                        vulnerabilities.append(result)
                    else:
                        secure.append(result)
            return {
                "vulnerabilities": vulnerabilities,
                "secure": secure,
                "vulnerability_count": sum(len(v["vulnerabilities"]) for v in vulnerabilities),
            }
        except ImportError:
            logger.warning("Security scanner not available")
            return await self._empty_security_result()
    async def _scan_single_dependency_security(self, dep: Dict[str, Any]) -> Dict[str, Any]:
        """Scan a single dependency for security issues."""
        try:
            from .security import scan_package_security
            result = await scan_package_security(
                dep["name"], 
                version=None,  # Latest version
                include_dependencies=False
            )
            vuln_summary = result.get("security_summary", {})
            return {
                "package": dep["name"],
                "vulnerabilities": result.get("vulnerabilities", {}).get("direct", []),
                "risk_level": vuln_summary.get("risk_level", "minimal"),
                "total_vulnerabilities": vuln_summary.get("total_vulnerabilities", 0)
            }
        except Exception as e:
            return {
                "package": dep["name"],
                "vulnerabilities": [],
                "risk_level": "unknown",
                "error": str(e)
            }
    async def _check_dependencies_compatibility(
        self, dependencies: List[Dict[str, Any]], python_version: Optional[str]
    ) -> Dict[str, Any]:
        """Check Python version compatibility for dependencies."""
        if not python_version:
            return await self._empty_compatibility_result()
        compatible = []
        incompatible = []
        # Process in batches
        batch_size = 10
        for i in range(0, len(dependencies), batch_size):
            batch = dependencies[i:i + batch_size]
            batch_tasks = []
            for dep in batch:
                task = self._check_single_dependency_compatibility(dep, python_version)
                batch_tasks.append(task)
            batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
            for dep, result in zip(batch, batch_results):
                if isinstance(result, Exception):
                    logger.debug(f"Failed to check compatibility for {dep['name']}: {result}")
                    continue
                if result["compatible"]:
                    compatible.append(result)
                else:
                    incompatible.append(result)
        return {
            "compatible": compatible,
            "incompatible": incompatible,
            "python_version": python_version,
            "compatibility_percentage": len(compatible) / len(dependencies) * 100 if dependencies else 0
        }
    async def _check_single_dependency_compatibility(
        self, dep: Dict[str, Any], python_version: str
    ) -> Dict[str, Any]:
        """Check compatibility for a single dependency."""
        try:
            from .compatibility_check import check_python_compatibility
            # Extract target Python version (simplified)
            target_version = "3.9"  # Default fallback
            version_match = re.search(r'(\d+\.\d+)', python_version)
            if version_match:
                target_version = version_match.group(1)
            result = await check_python_compatibility(dep["name"], target_version)
            return {
                "package": dep["name"],
                "compatible": result.get("compatible", False),
                "python_version": target_version,
                "details": result.get("compatibility_info", "")
            }
        except Exception as e:
            return {
                "package": dep["name"],
                "compatible": True,  # Assume compatible on error
                "python_version": python_version,
                "error": str(e)
            }
    # Helper methods for empty results
    async def _empty_updates_result(self) -> Dict[str, Any]:
        return {"outdated": [], "current": [], "update_percentage": 0}
    async def _empty_security_result(self) -> Dict[str, Any]:
        return {"vulnerabilities": [], "secure": [], "vulnerability_count": 0}
    async def _empty_compatibility_result(self) -> Dict[str, Any]:
        return {"compatible": [], "incompatible": [], "python_version": None, "compatibility_percentage": 100}
    def _generate_analysis_summary(
        self,
        dependencies: List[Dict[str, Any]],
        health_analysis: Dict[str, Any],
        update_analysis: Dict[str, Any],
        security_analysis: Dict[str, Any],
        compatibility_analysis: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Generate comprehensive analysis summary."""
        return {
            "total_dependencies": len(dependencies),
            "health_score": round(health_analysis.get("health_score", 0), 1),
            "packages_with_issues": len(health_analysis.get("issues", [])),
            "outdated_packages": len(update_analysis.get("outdated", [])),
            "security_vulnerabilities": security_analysis.get("vulnerability_count", 0),
            "compatibility_issues": len(compatibility_analysis.get("incompatible", [])),
            "overall_risk_level": self._calculate_overall_risk_level(
                health_analysis, update_analysis, security_analysis, compatibility_analysis
            )
        }
    def _calculate_overall_risk_level(
        self, health: Dict[str, Any], updates: Dict[str, Any], 
        security: Dict[str, Any], compatibility: Dict[str, Any]
    ) -> str:
        """Calculate overall risk level for the project."""
        risk_score = 0
        # Health risks
        health_score = health.get("health_score", 100)
        if health_score < 50:
            risk_score += 30
        elif health_score < 75:
            risk_score += 15
        # Security risks
        vuln_count = security.get("vulnerability_count", 0)
        if vuln_count > 10:
            risk_score += 40
        elif vuln_count > 5:
            risk_score += 25
        elif vuln_count > 0:
            risk_score += 15
        # Compatibility risks
        incompat_count = len(compatibility.get("incompatible", []))
        if incompat_count > 5:
            risk_score += 25
        elif incompat_count > 0:
            risk_score += 10
        # Update risks (outdated packages)
        outdated_count = len(updates.get("outdated", []))
        total_deps = len(updates.get("outdated", [])) + len(updates.get("current", []))
        if total_deps > 0:
            outdated_percentage = (outdated_count / total_deps) * 100
            if outdated_percentage > 50:
                risk_score += 20
            elif outdated_percentage > 25:
                risk_score += 10
        # Calculate risk level
        if risk_score >= 70:
            return "critical"
        elif risk_score >= 50:
            return "high"
        elif risk_score >= 30:
            return "medium"
        elif risk_score > 0:
            return "low"
        else:
            return "minimal"
    def _generate_requirements_recommendations(
        self,
        parsed_requirements: Dict[str, Any],
        health_analysis: Dict[str, Any],
        update_analysis: Dict[str, Any],
        security_analysis: Dict[str, Any],
        compatibility_analysis: Dict[str, Any],
        summary: Dict[str, Any]
    ) -> List[str]:
        """Generate actionable recommendations for requirements management."""
        recommendations = []
        risk_level = summary.get("overall_risk_level", "minimal")
        # Overall assessment
        if risk_level == "critical":
            recommendations.append("🚨 Critical issues detected - immediate action required")
        elif risk_level == "high":
            recommendations.append("⚠️  High risk dependencies - review and update urgently")
        elif risk_level == "medium":
            recommendations.append("⚠️  Moderate risk - address issues when possible")
        elif risk_level == "minimal":
            recommendations.append("✅ Requirements appear healthy")
        # Specific recommendations
        health_issues = health_analysis.get("issues", [])
        if health_issues:
            recommendations.append(f"🔧 Fix {len(health_issues)} dependency specification issues")
        outdated_count = len(update_analysis.get("outdated", []))
        if outdated_count > 0:
            recommendations.append(f"📦 Update {outdated_count} outdated packages")
        vuln_count = security_analysis.get("vulnerability_count", 0)
        if vuln_count > 0:
            recommendations.append(f"🔒 Address {vuln_count} security vulnerabilities")
        incompat_count = len(compatibility_analysis.get("incompatible", []))
        if incompat_count > 0:
            recommendations.append(f"🐍 Fix {incompat_count} Python compatibility issues")
        # File format recommendations
        file_format = parsed_requirements["file_info"]["format"]
        if file_format == "requirements.txt":
            recommendations.append("💡 Consider migrating to pyproject.toml for better dependency management")
        elif file_format == "unknown":
            recommendations.append("📝 Use standard requirements file formats (requirements.txt, pyproject.toml)")
        return recommendations
 # Main analysis functions
 async def analyze_project_requirements(
    file_path: str,
    check_updates: bool = True,
    security_scan: bool = True,
    compatibility_check: bool = True
 ) -> Dict[str, Any]:
    """
    Analyze project requirements file for dependencies, security, and compatibility.
    Args:
        file_path: Path to the requirements file
        check_updates: Whether to check for package updates
        security_scan: Whether to perform security vulnerability scanning
        compatibility_check: Whether to check Python version compatibility
    Returns:
        Comprehensive requirements file analysis
    """
    analyzer = RequirementsAnalyzer()
    return await analyzer.analyze_requirements_file(
        file_path, check_updates, security_scan, compatibility_check
    )
 async def compare_requirements_files(
    file_paths: List[str]
 ) -> Dict[str, Any]:
    """
    Compare multiple requirements files to identify differences and conflicts.
    Args:
        file_paths: List of paths to requirements files to compare
    Returns:
        Comparative analysis of requirements files
    """
    logger.info(f"Starting requirements comparison for {len(file_paths)} files")
    analyzer = RequirementsAnalyzer()
    file_analyses = {}
    # Analyze each file
    for file_path in file_paths:
        try:
            analysis = await analyzer.analyze_requirements_file(
                file_path, check_updates=False, security_scan=False, compatibility_check=False
            )
            file_analyses[file_path] = analysis
        except Exception as e:
            logger.error(f"Failed to analyze {file_path}: {e}")
            file_analyses[file_path] = {"error": str(e), "dependencies": []}
    # Compare dependencies
    all_packages = set()
    for analysis in file_analyses.values():
        if "dependencies" in analysis:
            for dep in analysis["dependencies"]:
                all_packages.add(dep["name"])
    # Generate comparison results
    conflicts = []
    common_packages = []
    unique_packages = {}
    for package in all_packages:
        versions_by_file = {}
        for file_path, analysis in file_analyses.items():
            if "dependencies" in analysis:
                for dep in analysis["dependencies"]:
                    if dep["name"] == package:
                        versions_by_file[file_path] = dep["version_specifiers"]
                        break
        if len(versions_by_file) == len(file_paths):
            # Package is in all files
            version_specs = list(versions_by_file.values())
            if len(set(str(spec) for spec in version_specs)) > 1:
                conflicts.append({
                    "package": package,
                    "versions_by_file": versions_by_file
                })
            else:
                common_packages.append(package)
        else:
            # Package is unique to some files
            for file_path, versions in versions_by_file.items():
                if file_path not in unique_packages:
                    unique_packages[file_path] = []
                unique_packages[file_path].append({
                    "package": package,
                    "version_specifiers": versions
                })
    return {
        "comparison_timestamp": datetime.now(timezone.utc).isoformat(),
        "files_compared": len(file_paths),
        "file_analyses": file_analyses,
        "comparison_results": {
            "total_unique_packages": len(all_packages),
            "common_packages": common_packages,
            "conflicting_packages": conflicts,
            "unique_to_files": unique_packages,
        },
        "recommendations": _generate_comparison_recommendations(conflicts, unique_packages, file_analyses)
    }
 def _generate_comparison_recommendations(
    conflicts: List[Dict[str, Any]], 
    unique_packages: Dict[str, List[Dict[str, Any]]], 
    file_analyses: Dict[str, Any]
 ) -> List[str]:
    """Generate recommendations for requirements file comparison."""
    recommendations = []
    if conflicts:
        recommendations.append(f"🔄 Resolve {len(conflicts)} version conflicts across files")
        for conflict in conflicts[:3]:  # Show first 3
            recommendations.append(f"  - {conflict['package']}: inconsistent versions")
    if unique_packages:
        total_unique = sum(len(packages) for packages in unique_packages.values())
        recommendations.append(f"📦 {total_unique} packages are unique to specific files")
    if not conflicts and not unique_packages:
        recommendations.append("✅ All requirements files are consistent")
    # File format recommendations
    formats = set()
    for analysis in file_analyses.values():
        if "file_info" in analysis:
            formats.add(analysis["file_info"]["format"])
    if len(formats) > 1:
        recommendations.append("📝 Consider standardizing on a single requirements file format")
    return recommendations
--- a/pypi_query_mcp/tools/requirements_tools.py
+++ b/pypi_query_mcp/tools/requirements_tools.py
@ -1,143 +0,0 @@
 """Requirements file analysis tools for Python projects."""
 import logging
 from typing import Any, Dict, List
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..tools.requirements_analyzer import analyze_project_requirements, compare_requirements_files
 logger = logging.getLogger(__name__)
 async def analyze_requirements_file_tool(
    file_path: str,
    check_updates: bool = True,
    security_scan: bool = True,
    compatibility_check: bool = True
 ) -> Dict[str, Any]:
    """
    Analyze project requirements file for dependencies, security, and compatibility.
    This tool provides comprehensive analysis of Python project requirements files
    including dependency parsing, version checking, security vulnerability scanning,
    Python compatibility assessment, and actionable recommendations for improvements.
    Args:
        file_path: Path to the requirements file (requirements.txt, pyproject.toml, setup.py, etc.)
        check_updates: Whether to check for available package updates
        security_scan: Whether to perform security vulnerability scanning on dependencies
        compatibility_check: Whether to check Python version compatibility for all dependencies
    Returns:
        Dictionary containing comprehensive requirements analysis including:
        - File information and detected format (requirements.txt, pyproject.toml, etc.)
        - Parsed dependencies with version specifiers and extras
        - Dependency health analysis with specification issues and recommendations
        - Package update analysis showing outdated packages and latest versions
        - Security vulnerability scan results for all dependencies
        - Python version compatibility assessment
        - Overall risk level and actionable improvement recommendations
    Raises:
        FileNotFoundError: If the requirements file is not found
        NetworkError: For network-related errors during analysis
        SearchError: If requirements analysis fails
    """
    logger.info(f"MCP tool: Analyzing requirements file {file_path}")
    try:
        result = await analyze_project_requirements(
            file_path=file_path,
            check_updates=check_updates,
            security_scan=security_scan,
            compatibility_check=compatibility_check
        )
        summary = result.get("analysis_summary", {})
        total_deps = summary.get("total_dependencies", 0)
        risk_level = summary.get("overall_risk_level", "unknown")
        logger.info(f"MCP tool: Requirements analysis completed for {file_path} - {total_deps} dependencies, risk level: {risk_level}")
        return result
    except (FileNotFoundError, NetworkError, SearchError) as e:
        logger.error(f"Error analyzing requirements file {file_path}: {e}")
        return {
            "error": f"Requirements analysis failed: {e}",
            "error_type": type(e).__name__,
            "file_path": file_path,
            "analysis_timestamp": "",
            "file_info": {"name": file_path, "format": "unknown"},
            "dependencies": [],
            "dependency_analysis": {},
            "analysis_summary": {
                "total_dependencies": 0,
                "health_score": 0,
                "packages_with_issues": 0,
                "outdated_packages": 0,
                "security_vulnerabilities": 0,
                "compatibility_issues": 0,
                "overall_risk_level": "critical",
            },
            "recommendations": [f"❌ Requirements analysis failed: {e}"],
            "python_requirements": None,
        }
 async def compare_multiple_requirements_files(
    file_paths: List[str]
 ) -> Dict[str, Any]:
    """
    Compare multiple requirements files to identify differences and conflicts.
    This tool analyzes multiple requirements files simultaneously to identify
    version conflicts, unique dependencies, and inconsistencies across different
    project configurations or environments.
    Args:
        file_paths: List of paths to requirements files to compare and analyze
    Returns:
        Dictionary containing comparative requirements analysis including:
        - Detailed analysis results for each individual file
        - Common packages shared across all files
        - Conflicting package versions between files with specific version details
        - Packages unique to specific files
        - Recommendations for resolving conflicts and standardizing requirements
        - Statistics on package overlap and conflict rates
    Raises:
        ValueError: If file_paths list is empty
        NetworkError: For network-related errors during analysis
        SearchError: If requirements comparison fails
    """
    if not file_paths:
        raise ValueError("File paths list cannot be empty")
    logger.info(f"MCP tool: Comparing {len(file_paths)} requirements files")
    try:
        result = await compare_requirements_files(file_paths=file_paths)
        comparison_results = result.get("comparison_results", {})
        conflicts = len(comparison_results.get("conflicting_packages", []))
        total_packages = comparison_results.get("total_unique_packages", 0)
        logger.info(f"MCP tool: Requirements comparison completed - {total_packages} unique packages, {conflicts} conflicts found")
        return result
    except (ValueError, NetworkError, SearchError) as e:
        logger.error(f"Error comparing requirements files: {e}")
        return {
            "error": f"Requirements comparison failed: {e}",
            "error_type": type(e).__name__,
            "comparison_timestamp": "",
            "files_compared": len(file_paths),
            "file_analyses": {},
            "comparison_results": {
                "total_unique_packages": 0,
                "common_packages": [],
                "conflicting_packages": [],
                "unique_to_files": {},
            },
            "recommendations": [f"❌ Requirements comparison failed: {e}"]
        }
--- a/pypi_query_mcp/tools/security.py
+++ b/pypi_query_mcp/tools/security.py
@ -1,660 +0,0 @@
 """Security vulnerability scanning and analysis tools for PyPI packages."""
 import asyncio
 import json
 import logging
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote
 import httpx
 from ..core.exceptions import NetworkError, SearchError
 from ..core.pypi_client import PyPIClient
 logger = logging.getLogger(__name__)
 class VulnerabilityScanner:
    """Comprehensive vulnerability scanner for PyPI packages."""
    def __init__(self):
        self.timeout = 30.0
        self.session = None
        # Vulnerability database endpoints
        self.osv_api = "https://api.osv.dev/v1/query"
        self.safety_db_api = "https://pyup.io/api/v1/safety"
        self.snyk_api = "https://snyk.io/test/pip"
        # Common vulnerability patterns to look for
        self.high_risk_patterns = [
            "remote code execution", "rce", "code injection", "sql injection",
            "cross-site scripting", "xss", "csrf", "authentication bypass",
            "privilege escalation", "arbitrary file", "path traversal",
            "buffer overflow", "memory corruption", "denial of service"
        ]
    async def scan_package(
        self, 
        package_name: str, 
        version: Optional[str] = None,
        include_dependencies: bool = True,
        severity_filter: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Comprehensive security scan of a PyPI package.
        Args:
            package_name: Name of the package to scan
            version: Specific version to scan (optional, defaults to latest)
            include_dependencies: Whether to scan dependencies too
            severity_filter: Filter by severity level (low, medium, high, critical)
        Returns:
            Dictionary containing security analysis results
        """
        logger.info(f"Starting security scan for package: {package_name}")
        try:
            # Get package information
            async with PyPIClient() as client:
                package_data = await client.get_package_info(package_name, version)
            package_version = version or package_data["info"]["version"]
            # Run parallel vulnerability scans
            scan_tasks = [
                self._scan_osv_database(package_name, package_version),
                self._scan_github_advisories(package_name, package_version),
                self._analyze_package_metadata(package_data),
                self._check_dependency_vulnerabilities(package_name, package_version) if include_dependencies else asyncio.create_task(self._empty_result())
            ]
            osv_results, github_results, metadata_analysis, dependency_results = await asyncio.gather(
                *scan_tasks, return_exceptions=True
            )
            # Consolidate results
            vulnerabilities = []
            # Process OSV results
            if not isinstance(osv_results, Exception) and osv_results:
                vulnerabilities.extend(osv_results.get("vulnerabilities", []))
            # Process GitHub results  
            if not isinstance(github_results, Exception) and github_results:
                vulnerabilities.extend(github_results.get("vulnerabilities", []))
            # Process dependency vulnerabilities
            if not isinstance(dependency_results, Exception) and dependency_results:
                vulnerabilities.extend(dependency_results.get("vulnerabilities", []))
            # Apply severity filter
            if severity_filter:
                vulnerabilities = [
                    vuln for vuln in vulnerabilities 
                    if vuln.get("severity", "").lower() == severity_filter.lower()
                ]
            # Generate security report
            security_report = self._generate_security_report(
                package_name, package_version, vulnerabilities, metadata_analysis
            )
            return security_report
        except Exception as e:
            logger.error(f"Security scan failed for {package_name}: {e}")
            raise SearchError(f"Security scan failed: {e}") from e
    async def _scan_osv_database(self, package_name: str, version: str) -> Dict[str, Any]:
        """Scan package against OSV (Open Source Vulnerabilities) database."""
        try:
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                query_data = {
                    "package": {
                        "name": package_name,
                        "ecosystem": "PyPI"
                    },
                    "version": version
                }
                response = await client.post(
                    self.osv_api,
                    json=query_data,
                    headers={"Content-Type": "application/json"}
                )
                if response.status_code == 200:
                    data = response.json()
                    vulnerabilities = []
                    for vuln in data.get("vulns", []):
                        severity = self._extract_severity_from_osv(vuln)
                        vulnerabilities.append({
                            "id": vuln.get("id", ""),
                            "summary": vuln.get("summary", ""),
                            "details": vuln.get("details", ""),
                            "severity": severity,
                            "published": vuln.get("published", ""),
                            "modified": vuln.get("modified", ""),
                            "source": "OSV",
                            "references": [ref.get("url", "") for ref in vuln.get("references", [])],
                            "affected_versions": self._extract_affected_versions(vuln),
                            "fixed_versions": self._extract_fixed_versions(vuln),
                        })
                    return {"vulnerabilities": vulnerabilities, "source": "OSV"}
                else:
                    logger.warning(f"OSV API returned status {response.status_code}")
        except Exception as e:
            logger.warning(f"OSV database scan failed: {e}")
        return {"vulnerabilities": [], "source": "OSV"}
    async def _scan_github_advisories(self, package_name: str, version: str) -> Dict[str, Any]:
        """Scan against GitHub Security Advisories."""
        try:
            # GitHub GraphQL API for security advisories
            query = """
            query($ecosystem: SecurityAdvisoryEcosystem!, $package: String!) {
              securityVulnerabilities(ecosystem: $ecosystem, package: $package, first: 100) {
                nodes {
                  advisory {
                    ghsaId
                    summary
                    description
                    severity
                    publishedAt
                    updatedAt
                    references {
                      url
                    }
                  }
                  vulnerableVersionRange
                  firstPatchedVersion {
                    identifier
                  }
                }
              }
            }
            """
            variables = {
                "ecosystem": "PIP",
                "package": package_name
            }
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                response = await client.post(
                    "https://api.github.com/graphql",
                    json={"query": query, "variables": variables},
                    headers={
                        "Content-Type": "application/json",
                        "User-Agent": "PyPI-Security-Scanner/1.0"
                    }
                )
                if response.status_code == 200:
                    data = response.json()
                    vulnerabilities = []
                    for vuln_node in data.get("data", {}).get("securityVulnerabilities", {}).get("nodes", []):
                        advisory = vuln_node.get("advisory", {})
                        # Check if current version is affected
                        if self._is_version_affected(version, vuln_node.get("vulnerableVersionRange", "")):
                            vulnerabilities.append({
                                "id": advisory.get("ghsaId", ""),
                                "summary": advisory.get("summary", ""),
                                "details": advisory.get("description", ""),
                                "severity": advisory.get("severity", "").lower(),
                                "published": advisory.get("publishedAt", ""),
                                "modified": advisory.get("updatedAt", ""),
                                "source": "GitHub",
                                "references": [ref.get("url", "") for ref in advisory.get("references", [])],
                                "vulnerable_range": vuln_node.get("vulnerableVersionRange", ""),
                                "first_patched": vuln_node.get("firstPatchedVersion", {}).get("identifier", ""),
                            })
                    return {"vulnerabilities": vulnerabilities, "source": "GitHub"}
        except Exception as e:
            logger.warning(f"GitHub advisories scan failed: {e}")
        return {"vulnerabilities": [], "source": "GitHub"}
    async def _analyze_package_metadata(self, package_data: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze package metadata for security indicators."""
        info = package_data.get("info", {})
        security_indicators = {
            "metadata_score": 0,
            "risk_factors": [],
            "security_features": [],
            "warnings": []
        }
        # Check for security-related information
        description = (info.get("description") or "").lower()
        summary = (info.get("summary") or "").lower()
        keywords = (info.get("keywords") or "").lower()
        combined_text = f"{description} {summary} {keywords}"
        # Look for security mentions
        if any(term in combined_text for term in ["security", "cryptography", "authentication", "encryption"]):
            security_indicators["security_features"].append("Contains security-related functionality")
            security_indicators["metadata_score"] += 20
        # Check for high-risk patterns
        for pattern in self.high_risk_patterns:
            if pattern in combined_text:
                security_indicators["risk_factors"].append(f"Mentions: {pattern}")
                security_indicators["metadata_score"] -= 10
        # Check package age and maintenance
        if info.get("author_email"):
            security_indicators["metadata_score"] += 10
        if info.get("home_page"):
            security_indicators["metadata_score"] += 5
        # Check for classifiers
        classifiers = info.get("classifiers", [])
        for classifier in classifiers:
            if "Development Status :: 5 - Production/Stable" in classifier:
                security_indicators["metadata_score"] += 15
                security_indicators["security_features"].append("Production stable status")
            elif "License ::" in classifier:
                security_indicators["metadata_score"] += 5
        # Check for suspicious patterns
        if not info.get("author") and not info.get("maintainer"):
            security_indicators["warnings"].append("No author or maintainer information")
            security_indicators["metadata_score"] -= 20
        if len(info.get("description", "")) < 50:
            security_indicators["warnings"].append("Very brief or missing description")
            security_indicators["metadata_score"] -= 10
        return security_indicators
    async def _check_dependency_vulnerabilities(self, package_name: str, version: str) -> Dict[str, Any]:
        """Check vulnerabilities in package dependencies."""
        try:
            # Get package dependencies
            async with PyPIClient() as client:
                package_data = await client.get_package_info(package_name, version)
            # Extract dependencies
            requires_dist = package_data.get("info", {}).get("requires_dist", []) or []
            dependencies = []
            for req in requires_dist:
                # Parse dependency name (simplified)
                dep_name = req.split()[0].split(">=")[0].split("==")[0].split("~=")[0].split("!=")[0]
                if dep_name and not dep_name.startswith("extra"):
                    dependencies.append(dep_name)
            # Scan top dependencies for vulnerabilities
            dependency_vulnerabilities = []
            # Limit to top 10 dependencies to avoid overwhelming the system
            for dep_name in dependencies[:10]:
                try:
                    dep_scan = await self._scan_osv_database(dep_name, "latest")
                    for vuln in dep_scan.get("vulnerabilities", []):
                        vuln["dependency"] = dep_name
                        vuln["type"] = "dependency_vulnerability"
                        dependency_vulnerabilities.append(vuln)
                except Exception as e:
                    logger.debug(f"Failed to scan dependency {dep_name}: {e}")
            return {"vulnerabilities": dependency_vulnerabilities, "source": "dependencies"}
        except Exception as e:
            logger.warning(f"Dependency vulnerability check failed: {e}")
            return {"vulnerabilities": [], "source": "dependencies"}
    async def _empty_result(self) -> Dict[str, Any]:
        """Return empty result for disabled scans."""
        return {"vulnerabilities": [], "source": "disabled"}
    def _extract_severity_from_osv(self, vuln_data: Dict[str, Any]) -> str:
        """Extract severity from OSV vulnerability data."""
        # OSV uses CVSS scores, map to common severity levels
        severity_data = vuln_data.get("severity", [])
        if severity_data:
            score = severity_data[0].get("score", "")
            if "CVSS:" in score:
                # Extract CVSS score
                try:
                    cvss_score = float(score.split("/")[1])
                    if cvss_score >= 9.0:
                        return "critical"
                    elif cvss_score >= 7.0:
                        return "high"
                    elif cvss_score >= 4.0:
                        return "medium"
                    else:
                        return "low"
                except:
                    pass
        return "unknown"
    def _extract_affected_versions(self, vuln_data: Dict[str, Any]) -> List[str]:
        """Extract affected version ranges from vulnerability data."""
        affected = vuln_data.get("affected", [])
        version_ranges = []
        for affect in affected:
            ranges = affect.get("ranges", [])
            for range_data in ranges:
                events = range_data.get("events", [])
                for event in events:
                    if "introduced" in event:
                        version_ranges.append(f">= {event['introduced']}")
                    elif "fixed" in event:
                        version_ranges.append(f"< {event['fixed']}")
        return version_ranges
    def _extract_fixed_versions(self, vuln_data: Dict[str, Any]) -> List[str]:
        """Extract fixed versions from vulnerability data."""
        affected = vuln_data.get("affected", [])
        fixed_versions = []
        for affect in affected:
            ranges = affect.get("ranges", [])
            for range_data in ranges:
                events = range_data.get("events", [])
                for event in events:
                    if "fixed" in event:
                        fixed_versions.append(event["fixed"])
        return fixed_versions
    def _is_version_affected(self, version: str, vulnerable_range: str) -> bool:
        """Check if a version is affected by a vulnerability range."""
        # Simplified version checking - in production would use packaging.specifiers
        if not vulnerable_range:
            return True
        # Basic patterns
        if "< " in vulnerable_range:
            try:
                limit = vulnerable_range.split("< ")[1].strip()
                return version < limit
            except:
                pass
        if ">= " in vulnerable_range:
            try:
                limit = vulnerable_range.split(">= ")[1].strip()
                return version >= limit
            except:
                pass
        return True  # Assume affected if we can't parse
    def _generate_security_report(
        self, 
        package_name: str, 
        version: str, 
        vulnerabilities: List[Dict[str, Any]], 
        metadata_analysis: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Generate comprehensive security report."""
        # Categorize vulnerabilities by severity
        severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0}
        dependency_vulns = []
        direct_vulns = []
        for vuln in vulnerabilities:
            severity = vuln.get("severity", "unknown")
            severity_counts[severity] = severity_counts.get(severity, 0) + 1
            if vuln.get("type") == "dependency_vulnerability":
                dependency_vulns.append(vuln)
            else:
                direct_vulns.append(vuln)
        # Calculate risk score
        risk_score = self._calculate_risk_score(severity_counts, metadata_analysis)
        # Generate recommendations
        recommendations = self._generate_security_recommendations(
            vulnerabilities, metadata_analysis, risk_score
        )
        return {
            "package": package_name,
            "version": version,
            "scan_timestamp": datetime.now(timezone.utc).isoformat(),
            "security_summary": {
                "total_vulnerabilities": len(vulnerabilities),
                "direct_vulnerabilities": len(direct_vulns),
                "dependency_vulnerabilities": len(dependency_vulns),
                "severity_breakdown": severity_counts,
                "risk_score": risk_score,
                "risk_level": self._get_risk_level(risk_score),
            },
            "vulnerabilities": {
                "direct": direct_vulns,
                "dependencies": dependency_vulns,
            },
            "metadata_analysis": metadata_analysis,
            "recommendations": recommendations,
            "scan_details": {
                "sources_checked": ["OSV", "GitHub", "Metadata"],
                "dependencies_scanned": len(dependency_vulns) > 0,
                "scan_completion": "success",
            }
        }
    def _calculate_risk_score(self, severity_counts: Dict[str, int], metadata_analysis: Dict[str, Any]) -> float:
        """Calculate overall risk score (0-100)."""
        score = 0.0
        # Vulnerability scoring (0-80 points)
        score += severity_counts.get("critical", 0) * 20
        score += severity_counts.get("high", 0) * 15
        score += severity_counts.get("medium", 0) * 8
        score += severity_counts.get("low", 0) * 3
        # Metadata scoring (0-20 points)
        metadata_score = metadata_analysis.get("metadata_score", 0)
        if metadata_score < 0:
            score += abs(metadata_score) / 5  # Convert negative metadata score to risk
        else:
            score -= metadata_score / 10  # Good metadata reduces risk
        # Cap at 100
        return min(max(score, 0), 100)
    def _get_risk_level(self, risk_score: float) -> str:
        """Convert risk score to risk level."""
        if risk_score >= 80:
            return "critical"
        elif risk_score >= 60:
            return "high"
        elif risk_score >= 30:
            return "medium"
        elif risk_score > 0:
            return "low"
        else:
            return "minimal"
    def _generate_security_recommendations(
        self, 
        vulnerabilities: List[Dict[str, Any]], 
        metadata_analysis: Dict[str, Any], 
        risk_score: float
    ) -> List[str]:
        """Generate actionable security recommendations."""
        recommendations = []
        if len(vulnerabilities) > 0:
            recommendations.append(f"🚨 Found {len(vulnerabilities)} security vulnerabilities - review and update immediately")
            # Check for critical/high severity
            critical_high = [v for v in vulnerabilities if v.get("severity") in ["critical", "high"]]
            if critical_high:
                recommendations.append(f"⚠️  {len(critical_high)} critical/high severity vulnerabilities require immediate attention")
            # Check for fixed versions
            fixed_versions = []
            for vuln in vulnerabilities:
                fixed = vuln.get("fixed_versions", []) or [vuln.get("first_patched", "")]
                fixed_versions.extend([v for v in fixed if v])
            if fixed_versions:
                latest_fixed = max(fixed_versions) if fixed_versions else None
                if latest_fixed:
                    recommendations.append(f"📦 Update to version {latest_fixed} or later to fix known vulnerabilities")
        # Metadata recommendations
        warnings = metadata_analysis.get("warnings", [])
        if warnings:
            recommendations.append(f"⚠️  Package metadata issues: {', '.join(warnings)}")
        if metadata_analysis.get("metadata_score", 0) < 20:
            recommendations.append("📝 Package has poor metadata quality - verify trustworthiness before use")
        # General recommendations based on risk score
        if risk_score >= 60:
            recommendations.append("🛑 High risk package - consider alternatives or additional security review")
        elif risk_score >= 30:
            recommendations.append("⚠️  Moderate risk - monitor for updates and security patches")
        elif len(vulnerabilities) == 0:
            recommendations.append("✅ No known vulnerabilities found - package appears secure")
        return recommendations
 # Main scanning functions
 async def scan_package_security(
    package_name: str,
    version: Optional[str] = None,
    include_dependencies: bool = True,
    severity_filter: Optional[str] = None
 ) -> Dict[str, Any]:
    """
    Scan a PyPI package for security vulnerabilities.
    Args:
        package_name: Name of the package to scan
        version: Specific version to scan (optional)
        include_dependencies: Whether to scan dependencies
        severity_filter: Filter by severity (low, medium, high, critical)
    Returns:
        Comprehensive security scan results
    """
    scanner = VulnerabilityScanner()
    return await scanner.scan_package(
        package_name, version, include_dependencies, severity_filter
    )
 async def bulk_security_scan(
    package_names: List[str],
    include_dependencies: bool = False,
    severity_threshold: str = "medium"
 ) -> Dict[str, Any]:
    """
    Perform bulk security scanning of multiple packages.
    Args:
        package_names: List of package names to scan
        include_dependencies: Whether to scan dependencies
        severity_threshold: Minimum severity to report
    Returns:
        Bulk scan results with summary
    """
    logger.info(f"Starting bulk security scan of {len(package_names)} packages")
    scanner = VulnerabilityScanner()
    scan_results = {}
    summary = {
        "total_packages": len(package_names),
        "packages_with_vulnerabilities": 0,
        "total_vulnerabilities": 0,
        "high_risk_packages": [],
        "scan_timestamp": datetime.now(timezone.utc).isoformat()
    }
    # Scan packages in parallel batches
    batch_size = 5
    for i in range(0, len(package_names), batch_size):
        batch = package_names[i:i + batch_size]
        batch_tasks = [
            scanner.scan_package(pkg_name, include_dependencies=include_dependencies)
            for pkg_name in batch
        ]
        batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
        for pkg_name, result in zip(batch, batch_results):
            if isinstance(result, Exception):
                scan_results[pkg_name] = {
                    "error": str(result),
                    "scan_status": "failed"
                }
            else:
                scan_results[pkg_name] = result
                # Update summary
                vuln_count = result.get("security_summary", {}).get("total_vulnerabilities", 0)
                if vuln_count > 0:
                    summary["packages_with_vulnerabilities"] += 1
                    summary["total_vulnerabilities"] += vuln_count
                risk_level = result.get("security_summary", {}).get("risk_level", "")
                if risk_level in ["high", "critical"]:
                    summary["high_risk_packages"].append({
                        "package": pkg_name,
                        "risk_level": risk_level,
                        "vulnerabilities": vuln_count
                    })
    return {
        "summary": summary,
        "detailed_results": scan_results,
        "recommendations": _generate_bulk_recommendations(summary, scan_results)
    }
 def _generate_bulk_recommendations(summary: Dict[str, Any], results: Dict[str, Any]) -> List[str]:
    """Generate recommendations for bulk scan results."""
    recommendations = []
    vuln_packages = summary["packages_with_vulnerabilities"]
    total_packages = summary["total_packages"]
    if vuln_packages == 0:
        recommendations.append("✅ No security vulnerabilities found in any scanned packages")
    else:
        percentage = (vuln_packages / total_packages) * 100
        recommendations.append(
            f"🚨 {vuln_packages}/{total_packages} packages ({percentage:.1f}%) have security vulnerabilities"
        )
    high_risk = summary["high_risk_packages"]
    if high_risk:
        recommendations.append(
            f"⚠️  {len(high_risk)} packages are high/critical risk: {', '.join([p['package'] for p in high_risk])}"
        )
        recommendations.append("🛑 Priority: Address high-risk packages immediately")
    if summary["total_vulnerabilities"] > 0:
        recommendations.append(f"📊 Total vulnerabilities found: {summary['total_vulnerabilities']}")
        recommendations.append("🔍 Review detailed results and update affected packages")
    return recommendations
--- a/pypi_query_mcp/tools/security_tools.py
+++ b/pypi_query_mcp/tools/security_tools.py
@ -1,147 +0,0 @@
 """Security vulnerability scanning tools for PyPI packages."""
 import logging
 from typing import Any, Dict, List, Optional
 from ..core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from ..tools.security import bulk_security_scan, scan_package_security
 logger = logging.getLogger(__name__)
 async def scan_pypi_package_security(
    package_name: str,
    version: Optional[str] = None,
    include_dependencies: bool = True,
    severity_filter: Optional[str] = None
 ) -> Dict[str, Any]:
    """
    Scan a PyPI package for security vulnerabilities.
    This tool performs comprehensive security vulnerability scanning of PyPI packages,
    checking against multiple vulnerability databases including OSV (Open Source Vulnerabilities),
    GitHub Security Advisories, and analyzing package metadata for security indicators.
    Args:
        package_name: Name of the package to scan for vulnerabilities
        version: Specific version to scan (optional, defaults to latest version)
        include_dependencies: Whether to scan package dependencies for vulnerabilities  
        severity_filter: Filter results by severity level (low, medium, high, critical)
    Returns:
        Dictionary containing comprehensive security scan results including:
        - Total vulnerability count and severity breakdown
        - Direct package vulnerabilities vs dependency vulnerabilities
        - Risk score and level assessment (minimal, low, medium, high, critical)
        - Detailed vulnerability information with IDs, descriptions, and references
        - Package metadata security analysis
        - Actionable security recommendations
    Raises:
        InvalidPackageNameError: If package name is empty or invalid
        PackageNotFoundError: If package is not found on PyPI
        NetworkError: For network-related errors
        SearchError: If security scanning fails
    """
    if not package_name or not package_name.strip():
        raise InvalidPackageNameError(package_name)
    logger.info(f"MCP tool: Scanning security for package {package_name}")
    try:
        result = await scan_package_security(
            package_name=package_name,
            version=version,
            include_dependencies=include_dependencies,
            severity_filter=severity_filter
        )
        logger.info(f"MCP tool: Security scan completed for {package_name} - found {result.get('security_summary', {}).get('total_vulnerabilities', 0)} vulnerabilities")
        return result
    except (InvalidPackageNameError, NetworkError, SearchError) as e:
        logger.error(f"Error scanning security for {package_name}: {e}")
        return {
            "error": f"Security scan failed: {e}",
            "error_type": type(e).__name__,
            "package": package_name,
            "version": version,
            "scan_timestamp": "",
            "security_summary": {
                "total_vulnerabilities": 0,
                "direct_vulnerabilities": 0,
                "dependency_vulnerabilities": 0,
                "severity_breakdown": {"critical": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0},
                "risk_score": 0,
                "risk_level": "unknown",
            },
            "vulnerabilities": {"direct": [], "dependencies": []},
            "metadata_analysis": {},
            "recommendations": [f"❌ Security scan failed: {e}"],
            "scan_details": {
                "sources_checked": [],
                "dependencies_scanned": False,
                "scan_completion": "error",
            }
        }
 async def bulk_scan_package_security(
    package_names: List[str],
    include_dependencies: bool = False,
    severity_threshold: str = "medium"
 ) -> Dict[str, Any]:
    """
    Perform bulk security scanning of multiple PyPI packages.
    This tool scans multiple packages simultaneously for security vulnerabilities,
    providing a consolidated report with summary statistics and prioritized
    recommendations for addressing security issues across your package ecosystem.
    Args:
        package_names: List of package names to scan for vulnerabilities
        include_dependencies: Whether to include dependency vulnerability scanning
        severity_threshold: Minimum severity level to report (low, medium, high, critical)
    Returns:
        Dictionary containing bulk scan results including:
        - Summary statistics (total packages, packages with vulnerabilities, high-risk packages)
        - Detailed scan results for each package
        - Prioritized recommendations for security remediation
        - Scan timestamp and completion status
    Raises:
        ValueError: If package_names list is empty
        NetworkError: For network-related errors during scanning
        SearchError: If bulk scanning fails
    """
    if not package_names:
        raise ValueError("Package names list cannot be empty")
    logger.info(f"MCP tool: Starting bulk security scan of {len(package_names)} packages")
    try:
        result = await bulk_security_scan(
            package_names=package_names,
            include_dependencies=include_dependencies,
            severity_threshold=severity_threshold
        )
        logger.info(f"MCP tool: Bulk security scan completed - {result.get('summary', {}).get('packages_with_vulnerabilities', 0)} packages have vulnerabilities")
        return result
    except (ValueError, NetworkError, SearchError) as e:
        logger.error(f"Error in bulk security scan: {e}")
        return {
            "error": f"Bulk security scan failed: {e}",
            "error_type": type(e).__name__,
            "summary": {
                "total_packages": len(package_names),
                "packages_with_vulnerabilities": 0,
                "total_vulnerabilities": 0,
                "high_risk_packages": [],
                "scan_timestamp": ""
            },
            "detailed_results": {},
            "recommendations": [f"❌ Bulk security scan failed: {e}"]
        }
--- a/pypi_query_mcp/tools/workflow.py
+++ b/pypi_query_mcp/tools/workflow.py
--- a/pyproject.toml
+++ b/pyproject.toml
@ -35,7 +35,6 @@ packaging = "^24.0"
 pydantic = "^2.0.0"
 pydantic-settings = "^2.0.0"
 click = "8.1.7"
 feedparser = "^6.0.0"
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.0"
--- a/tests/test_community.py
+++ b/tests/test_community.py
@ -1,985 +0,0 @@
 """Tests for PyPI community and social tools functionality."""
 import json
 from datetime import datetime
 from unittest.mock import AsyncMock, patch, MagicMock
 import httpx
 import pytest
 from pypi_query_mcp.core.exceptions import InvalidPackageNameError, PackageNotFoundError, NetworkError
 from pypi_query_mcp.tools.community import (
    get_pypi_package_reviews,
    manage_pypi_package_discussions,
    get_pypi_maintainer_contacts,
    _analyze_github_community_sentiment,
    _check_stackoverflow_mentions,
    _analyze_pypi_downloads_as_quality_indicator,
    _get_community_health_metrics,
    _calculate_community_score,
    _generate_community_insights,
    _extract_contact_info_from_metadata,
    _find_github_repository,
    _parse_github_url,
    _analyze_issue_sentiment,
    _analyze_stackoverflow_sentiment,
 )
 class TestGetPyPIPackageReviews:
    """Test community reviews and feedback functionality."""
    @pytest.fixture
    def mock_package_data(self):
        """Mock package data for testing."""
        return {
            "info": {
                "name": "test-package",
                "version": "1.0.0",
                "summary": "A test package for community analysis",
                "description": "A comprehensive test package with detailed description for community testing",
                "keywords": "test, community, package",
                "classifiers": [
                    "Development Status :: 4 - Beta",
                    "Intended Audience :: Developers",
                    "License :: OSI Approved :: MIT License",
                    "Programming Language :: Python :: 3",
                    "Topic :: Software Development :: Libraries",
                ],
                "license": "MIT",
                "author": "Test Author",
                "home_page": "https://example.com",
                "project_urls": {
                    "Documentation": "https://docs.example.com",
                    "Repository": "https://github.com/test/test-package",
                    "Bug Reports": "https://github.com/test/test-package/issues",
                },
            }
        }
    @pytest.fixture
    def mock_github_sentiment(self):
        """Mock GitHub sentiment analysis data."""
        return {
            "repository": "https://github.com/test/test-package",
            "sentiment_analysis": {
                "overall_sentiment_score": 75.5,
                "issues_analyzed": 20,
                "positive_indicators": 15,
                "negative_indicators": 5,
                "sentiment_factors": {
                    "closed_issues": 12,
                    "open_issues": 8,
                    "enhancement_requests": 5,
                    "bug_reports": 3,
                },
            },
            "repository_stats": {
                "stargazers_count": 150,
                "forks_count": 25,
                "open_issues_count": 8,
            },
            "issues_analyzed": 20,
            "analysis_timestamp": datetime.now().isoformat(),
        }
    @pytest.fixture
    def mock_stackoverflow_data(self):
        """Mock Stack Overflow mentions data."""
        return {
            "questions_found": 5,
            "sentiment_analysis": {
                "overall_sentiment_score": 65.0,
                "questions_analyzed": 5,
                "positive_indicators": 3,
                "negative_indicators": 2,
                "question_characteristics": {
                    "answered_questions": 4,
                    "unanswered_questions": 1,
                    "average_score": 2.4,
                },
            },
            "search_timestamp": datetime.now().isoformat(),
            "data_source": "Stack Overflow API",
        }
    @pytest.fixture
    def mock_quality_indicators(self):
        """Mock quality indicators data."""
        return {
            "download_stats": {
                "last_month": 50000,
                "last_week": 12000,
                "last_day": 2000,
            },
            "adoption_level": "moderate",
            "quality_indicator_score": 50.0,
            "analysis_timestamp": datetime.now().isoformat(),
        }
    @pytest.fixture
    def mock_community_health(self):
        """Mock community health metrics."""
        return {
            "github_community_health": {
                "health_percentage": 85,
                "documentation": {"exists": True},
                "contributing": {"exists": True},
                "code_of_conduct": {"exists": True},
                "license": {"exists": True},
                "readme": {"exists": True},
            },
            "has_repository": True,
            "repository_url": "https://github.com/test/test-package",
        }
    async def test_get_pypi_package_reviews_success(
        self, 
        mock_package_data, 
        mock_github_sentiment, 
        mock_stackoverflow_data, 
        mock_quality_indicators, 
        mock_community_health
    ):
        """Test successful retrieval of package reviews."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._analyze_github_community_sentiment") as mock_github, \
             patch("pypi_query_mcp.tools.community._check_stackoverflow_mentions") as mock_stackoverflow, \
             patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
             patch("pypi_query_mcp.tools.community._get_community_health_metrics") as mock_health:
            mock_metadata.return_value = mock_package_data["info"]
            mock_github.return_value = mock_github_sentiment
            mock_stackoverflow.return_value = mock_stackoverflow_data
            mock_quality.return_value = mock_quality_indicators
            mock_health.return_value = mock_community_health
            result = await get_pypi_package_reviews(
                package_name="test-package",
                include_ratings=True,
                include_community_feedback=True,
                sentiment_analysis=True,
                max_reviews=50
            )
            assert result["package"] == "test-package"
            assert "community_score" in result
            assert "metadata" in result
            assert "community_health" in result
            assert "quality_indicators" in result
            assert "insights" in result
            assert "review_system_status" in result
            assert "github_community_feedback" in result
            assert "stackoverflow_mentions" in result
            assert "sentiment_analysis" in result
            assert "ratings" in result
            # Check community score structure
            community_score = result["community_score"]
            assert "overall_score" in community_score
            assert "community_status" in community_score
            assert "score_components" in community_score
            # Check review system status
            review_status = result["review_system_status"]
            assert review_status["native_pypi_reviews"] == "not_available"
            assert review_status["future_ready"] is True
    async def test_get_pypi_package_reviews_invalid_package_name(self):
        """Test handling of invalid package name."""
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_package_reviews("")
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_package_reviews("   ")
    async def test_get_pypi_package_reviews_minimal_options(self, mock_package_data):
        """Test reviews with minimal options enabled."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
             patch("pypi_query_mcp.tools.community._get_community_health_metrics") as mock_health:
            mock_metadata.return_value = mock_package_data["info"]
            mock_quality.return_value = {"quality_indicator_score": 30}
            mock_health.return_value = {"has_repository": False}
            result = await get_pypi_package_reviews(
                package_name="test-package",
                include_ratings=False,
                include_community_feedback=False,
                sentiment_analysis=False
            )
            assert result["package"] == "test-package"
            assert "github_community_feedback" not in result
            assert "stackoverflow_mentions" not in result
            assert "sentiment_analysis" not in result
            assert "ratings" not in result
    async def test_get_pypi_package_reviews_network_error(self):
        """Test handling of network errors."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews", side_effect=NetworkError("Network error")):
            with pytest.raises(NetworkError):
                await get_pypi_package_reviews("test-package")
 class TestManagePyPIPackageDiscussions:
    """Test package discussions management functionality."""
    @pytest.fixture
    def mock_package_data(self):
        """Mock package data for discussions testing."""
        return {
            "info": {
                "name": "test-package",
                "project_urls": {
                    "Repository": "https://github.com/test/test-package",
                },
            }
        }
    @pytest.fixture
    def mock_discussion_status(self):
        """Mock current discussion status."""
        return {
            "github_discussions": {
                "enabled": False,
                "reason": "requires_github_api_integration",
                "repository": "https://github.com/test/test-package",
            },
            "community_platforms": {
                "discord": {"available": False},
                "reddit": {"available": False},
                "forums": {"available": False},
            },
            "native_pypi_discussions": {
                "available": False,
                "note": "PyPI does not currently support native discussions",
            },
        }
    async def test_manage_discussions_get_status(self, mock_package_data, mock_discussion_status):
        """Test getting discussion status."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
            mock_metadata.return_value = mock_package_data
            mock_status.return_value = mock_discussion_status
            result = await manage_pypi_package_discussions(
                package_name="test-package",
                action="get_status"
            )
            assert result["package"] == "test-package"
            assert result["action_performed"] == "get_status"
            assert "status" in result
            assert "current_discussion_status" in result
            assert "available_platforms" in result
            assert "discussion_system_status" in result
            # Check system status
            system_status = result["discussion_system_status"]
            assert system_status["native_pypi_discussions"] == "not_available"
            assert system_status["future_ready"] is True
    async def test_manage_discussions_enable(self, mock_package_data, mock_discussion_status):
        """Test enabling discussions."""
        discussion_settings = {
            "categories": ["General", "Q&A", "Ideas"],
            "moderation": "manual_review",
        }
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
            mock_metadata.return_value = mock_package_data
            mock_status.return_value = mock_discussion_status
            result = await manage_pypi_package_discussions(
                package_name="test-package",
                action="enable",
                discussion_settings=discussion_settings
            )
            assert result["package"] == "test-package"
            assert result["action_performed"] == "enable"
            assert result["status"] == "configured"
            assert result["action"] == "enable_discussions"
            assert "settings_applied" in result
            assert "next_steps" in result
    async def test_manage_discussions_disable(self, mock_package_data, mock_discussion_status):
        """Test disabling discussions."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
            mock_metadata.return_value = mock_package_data
            mock_status.return_value = mock_discussion_status
            result = await manage_pypi_package_discussions(
                package_name="test-package",
                action="disable"
            )
            assert result["package"] == "test-package"
            assert result["action_performed"] == "disable"
            assert result["status"] == "configured"
            assert result["action"] == "disable_discussions"
            assert "next_steps" in result
    async def test_manage_discussions_configure(self, mock_package_data, mock_discussion_status):
        """Test configuring discussions."""
        discussion_settings = {
            "categories": ["General", "Q&A", "Ideas", "Show and Tell"],
            "moderation": "community_moderation",
            "notifications": ["email_notifications", "web_notifications"],
        }
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
            mock_metadata.return_value = mock_package_data
            mock_status.return_value = mock_discussion_status
            result = await manage_pypi_package_discussions(
                package_name="test-package",
                action="configure",
                discussion_settings=discussion_settings
            )
            assert result["package"] == "test-package"
            assert result["action_performed"] == "configure"
            assert result["status"] == "configured"
            assert result["action"] == "configure_discussions"
            assert "configuration_options" in result
    async def test_manage_discussions_moderate(self, mock_package_data, mock_discussion_status):
        """Test moderating discussions."""
        moderator_controls = {
            "content_filtering": True,
            "auto_moderation": True,
            "moderator_roles": ["owner", "maintainer"],
        }
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
            mock_metadata.return_value = mock_package_data
            mock_status.return_value = mock_discussion_status
            result = await manage_pypi_package_discussions(
                package_name="test-package",
                action="moderate",
                moderator_controls=moderator_controls
            )
            assert result["package"] == "test-package"
            assert result["action_performed"] == "moderate"
            assert result["status"] == "moderation_configured"
            assert result["action"] == "moderate_discussions"
            assert "moderation_features" in result
    async def test_manage_discussions_get_metrics(self, mock_package_data, mock_discussion_status):
        """Test getting discussion metrics."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_discussions") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._get_current_discussion_status") as mock_status:
            mock_metadata.return_value = mock_package_data
            mock_status.return_value = mock_discussion_status
            result = await manage_pypi_package_discussions(
                package_name="test-package",
                action="get_metrics"
            )
            assert result["package"] == "test-package"
            assert result["action_performed"] == "get_metrics"
            assert result["status"] == "metrics_retrieved"
            assert "github_metrics" in result
            assert "overall_engagement" in result
    async def test_manage_discussions_invalid_action(self):
        """Test handling of invalid action."""
        with pytest.raises(InvalidPackageNameError):
            await manage_pypi_package_discussions(
                package_name="test-package",
                action="invalid_action"
            )
    async def test_manage_discussions_invalid_package_name(self):
        """Test handling of invalid package name."""
        with pytest.raises(InvalidPackageNameError):
            await manage_pypi_package_discussions("")
        with pytest.raises(InvalidPackageNameError):
            await manage_pypi_package_discussions("   ")
 class TestGetPyPIMaintainerContacts:
    """Test maintainer contact information functionality."""
    @pytest.fixture
    def mock_package_metadata(self):
        """Mock package metadata for contact testing."""
        return {
            "name": "test-package",
            "author": "Test Author",
            "author_email": "author@example.com",
            "maintainer": "Test Maintainer",
            "maintainer_email": "maintainer@example.com",
            "home_page": "https://example.com",
            "project_urls": {
                "Documentation": "https://docs.example.com",
                "Repository": "https://github.com/test/test-package",
                "Bug Reports": "https://github.com/test/test-package/issues",
                "Support": "https://support.example.com",
            },
        }
    @pytest.fixture
    def mock_github_info(self):
        """Mock GitHub maintainer information."""
        return {
            "repository": "https://github.com/test/test-package",
            "owner": "test",
            "repository_data": {
                "owner": {
                    "login": "test",
                    "type": "User",
                    "html_url": "https://github.com/test",
                },
                "has_pages": True,
                "default_branch": "main",
            },
            "contributors": [
                {
                    "login": "test",
                    "contributions": 150,
                    "html_url": "https://github.com/test",
                },
                {
                    "login": "contributor1",
                    "contributions": 25,
                    "html_url": "https://github.com/contributor1",
                },
            ],
            "primary_maintainer": {
                "login": "test",
                "type": "User",
                "html_url": "https://github.com/test",
            },
        }
    @pytest.fixture
    def mock_support_channels(self):
        """Mock support channels information."""
        return {
            "issue_tracker": "https://github.com/test/test-package/issues",
            "documentation": "https://test.github.io/test-package/",
            "community_forum": None,
            "chat_channels": [],
        }
    @pytest.fixture
    def mock_community_channels(self):
        """Mock community channels information."""
        return {
            "github_discussions": "https://github.com/test/test-package/discussions",
            "stackoverflow_tag": "https://stackoverflow.com/questions/tagged/test-package",
            "reddit_community": None,
            "discord_server": None,
        }
    @pytest.fixture
    def mock_contribution_info(self):
        """Mock contribution guidelines information."""
        return {
            "repository": "https://github.com/test/test-package",
            "contribution_files": {
                "CONTRIBUTING.md": True,
                "CODE_OF_CONDUCT.md": True,
                "SECURITY.md": False,
            },
            "guidelines_available": True,
        }
    async def test_get_maintainer_contacts_success(
        self, 
        mock_package_metadata, 
        mock_github_info, 
        mock_support_channels, 
        mock_community_channels, 
        mock_contribution_info
    ):
        """Test successful retrieval of maintainer contacts."""
        contact_types = ["github", "support", "community"]
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._analyze_github_maintainer_info") as mock_github, \
             patch("pypi_query_mcp.tools.community._get_support_channels") as mock_support, \
             patch("pypi_query_mcp.tools.community._get_community_channels") as mock_community, \
             patch("pypi_query_mcp.tools.community._get_contribution_guidelines") as mock_contrib:
            mock_metadata.return_value = mock_package_metadata
            mock_github.return_value = mock_github_info
            mock_support.return_value = mock_support_channels
            mock_community.return_value = mock_community_channels
            mock_contrib.return_value = mock_contribution_info
            result = await get_pypi_maintainer_contacts(
                package_name="test-package",
                contact_types=contact_types,
                include_social_profiles=True,
                include_contribution_guidelines=True,
                respect_privacy_settings=True
            )
            assert result["package"] == "test-package"
            assert "contact_information" in result
            assert "accessibility_assessment" in result
            assert "contact_recommendations" in result
            assert "privacy_compliance" in result
            assert "github_information" in result
            assert "support_channels" in result
            assert "community_channels" in result
            assert "contribution_guidelines" in result
            assert "social_profiles" in result
            assert "communication_guidelines" in result
            # Check privacy compliance
            privacy = result["privacy_compliance"]
            assert privacy["respects_privacy_settings"] is True
            assert privacy["data_sources"] == "Publicly available information only"
    async def test_get_maintainer_contacts_email_included(self, mock_package_metadata):
        """Test contacts with email included and privacy disabled."""
        contact_types = ["email", "github"]
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata, \
             patch("pypi_query_mcp.tools.community._analyze_github_maintainer_info") as mock_github:
            mock_metadata.return_value = mock_package_metadata
            mock_github.return_value = {"status": "no_github_repository"}
            result = await get_pypi_maintainer_contacts(
                package_name="test-package",
                contact_types=contact_types,
                respect_privacy_settings=False
            )
            contact_info = result["contact_information"]
            assert "available_contacts" in contact_info
            # When privacy is disabled, emails should be included
            if not contact_info["privacy_compliant"]:
                # This would include emails if privacy is disabled
                pass
    async def test_get_maintainer_contacts_privacy_enabled(self, mock_package_metadata):
        """Test contacts with privacy settings enabled."""
        contact_types = ["email", "github"]
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata:
            mock_metadata.return_value = mock_package_metadata
            result = await get_pypi_maintainer_contacts(
                package_name="test-package",
                contact_types=contact_types,
                respect_privacy_settings=True
            )
            contact_info = result["contact_information"]
            assert contact_info["privacy_compliant"] is True
            # With privacy enabled, emails should be hidden
            if "email_note" in contact_info.get("available_contacts", {}):
                assert "hidden due to privacy settings" in contact_info["available_contacts"]["email_note"]
    async def test_get_maintainer_contacts_minimal_options(self, mock_package_metadata):
        """Test contacts with minimal options."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_contacts") as mock_metadata:
            mock_metadata.return_value = mock_package_metadata
            result = await get_pypi_maintainer_contacts(
                package_name="test-package",
                contact_types=["support"],
                include_social_profiles=False,
                include_contribution_guidelines=False
            )
            assert result["package"] == "test-package"
            assert "contact_information" in result
            assert "github_information" not in result
            assert "contribution_guidelines" not in result
            assert "social_profiles" not in result
    async def test_get_maintainer_contacts_invalid_contact_types(self):
        """Test handling of invalid contact types."""
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_maintainer_contacts(
                package_name="test-package",
                contact_types=["invalid_type"]
            )
    async def test_get_maintainer_contacts_invalid_package_name(self):
        """Test handling of invalid package name."""
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_maintainer_contacts("")
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_maintainer_contacts("   ")
 class TestHelperFunctions:
    """Test helper functions for community tools."""
    def test_parse_github_url_valid(self):
        """Test parsing valid GitHub URLs."""
        test_cases = [
            ("https://github.com/owner/repo", {"repository_url": "https://github.com/owner/repo", "owner": "owner", "repo": "repo"}),
            ("https://github.com/owner/repo.git", {"repository_url": "https://github.com/owner/repo", "owner": "owner", "repo": "repo"}),
            ("https://github.com/owner/repo/", {"repository_url": "https://github.com/owner/repo", "owner": "owner", "repo": "repo"}),
        ]
        for url, expected in test_cases:
            result = _parse_github_url(url)
            assert result == expected
    def test_parse_github_url_invalid(self):
        """Test parsing invalid GitHub URLs."""
        test_cases = [
            "https://gitlab.com/owner/repo",
            "https://github.com/owner",
            "https://github.com/",
            "not-a-url",
        ]
        for url in test_cases:
            result = _parse_github_url(url)
            assert "status" in result or "error" in result
    def test_analyze_issue_sentiment_positive(self):
        """Test analyzing positive GitHub issue sentiment."""
        issues_data = {
            "issues": [
                {
                    "title": "Enhancement: Add new feature",
                    "state": "closed",
                    "labels": [{"name": "enhancement"}, {"name": "good first issue"}],
                },
                {
                    "title": "How to use this package?",
                    "state": "closed",
                    "labels": [{"name": "question"}],
                },
            ]
        }
        result = _analyze_issue_sentiment(issues_data)
        assert result["overall_sentiment_score"] > 50
        assert result["issues_analyzed"] == 2
        assert result["sentiment_factors"]["closed_issues"] == 2
        assert result["sentiment_factors"]["enhancement_requests"] == 1
    def test_analyze_issue_sentiment_negative(self):
        """Test analyzing negative GitHub issue sentiment."""
        issues_data = {
            "issues": [
                {
                    "title": "Critical bug: Application crashes",
                    "state": "open",
                    "labels": [{"name": "bug"}, {"name": "critical"}],
                },
                {
                    "title": "Error when importing package",
                    "state": "open",
                    "labels": [{"name": "bug"}],
                },
            ]
        }
        result = _analyze_issue_sentiment(issues_data)
        assert result["overall_sentiment_score"] < 50
        assert result["issues_analyzed"] == 2
        assert result["sentiment_factors"]["open_issues"] == 2
        assert result["sentiment_factors"]["bug_reports"] == 2
    def test_analyze_stackoverflow_sentiment_positive(self):
        """Test analyzing positive Stack Overflow sentiment."""
        questions = [
            {
                "title": "How to implement best practices with test-package",
                "tags": ["test-package", "python"],
                "score": 5,
                "is_answered": True,
            },
            {
                "title": "Tutorial: Getting started with test-package",
                "tags": ["test-package", "tutorial"],
                "score": 3,
                "is_answered": True,
            },
        ]
        result = _analyze_stackoverflow_sentiment(questions, "test-package")
        assert result["overall_sentiment_score"] > 50
        assert result["questions_analyzed"] == 2
        assert result["question_characteristics"]["answered_questions"] == 2
        assert result["question_characteristics"]["average_score"] == 4.0
    def test_analyze_stackoverflow_sentiment_negative(self):
        """Test analyzing negative Stack Overflow sentiment."""
        questions = [
            {
                "title": "test-package not working: Error on import",
                "tags": ["test-package", "error"],
                "score": -1,
                "is_answered": False,
            },
            {
                "title": "Problem with test-package installation",
                "tags": ["test-package", "installation"],
                "score": 0,
                "is_answered": False,
            },
        ]
        result = _analyze_stackoverflow_sentiment(questions, "test-package")
        assert result["overall_sentiment_score"] < 50
        assert result["questions_analyzed"] == 2
        assert result["question_characteristics"]["unanswered_questions"] == 2
        assert result["question_characteristics"]["average_score"] == -0.5
    def test_calculate_community_score_excellent(self):
        """Test calculating excellent community score."""
        github_sentiment = {
            "sentiment_analysis": {"overall_sentiment_score": 85}
        }
        stackoverflow_data = {
            "sentiment_analysis": {"overall_sentiment_score": 80}
        }
        quality_indicators = {
            "quality_indicator_score": 90
        }
        community_health = {
            "github_community_health": {"health_percentage": 95}
        }
        result = _calculate_community_score(
            github_sentiment, 
            stackoverflow_data, 
            quality_indicators, 
            community_health
        )
        assert result["overall_score"] >= 80
        assert result["community_status"] == "excellent"
        assert len(result["score_components"]) > 0
    def test_calculate_community_score_poor(self):
        """Test calculating poor community score."""
        github_sentiment = {
            "sentiment_analysis": {"overall_sentiment_score": 20}
        }
        stackoverflow_data = {
            "sentiment_analysis": {"overall_sentiment_score": 25}
        }
        quality_indicators = {
            "quality_indicator_score": 15
        }
        community_health = {}
        result = _calculate_community_score(
            github_sentiment, 
            stackoverflow_data, 
            quality_indicators, 
            community_health
        )
        assert result["overall_score"] < 35
        assert result["community_status"] == "poor"
    def test_generate_community_insights_strong_community(self):
        """Test generating insights for strong community."""
        github_sentiment = {
            "repository_stats": {"stargazers_count": 2000}
        }
        stackoverflow_data = {
            "questions_found": 25
        }
        community_score = {
            "overall_score": 85
        }
        package_metadata = {
            "name": "test-package"
        }
        result = _generate_community_insights(
            github_sentiment, 
            stackoverflow_data, 
            community_score, 
            package_metadata
        )
        assert "key_insights" in result
        assert "community_strengths" in result
        assert len(result["community_strengths"]) > 0
        # Should have positive insights for high score
        insights_text = " ".join(result["key_insights"])
        assert "strong" in insights_text.lower() or "positive" in insights_text.lower()
    def test_extract_contact_info_from_metadata_with_privacy(self):
        """Test extracting contact info with privacy enabled."""
        package_metadata = {
            "author_email": "author@example.com",
            "maintainer_email": "maintainer@example.com",
            "project_urls": {
                "Repository": "https://github.com/test/repo",
                "Documentation": "https://docs.example.com",
                "Support": "https://support.example.com",
            },
            "home_page": "https://example.com",
        }
        contact_types = ["email", "github", "support"]
        result = _extract_contact_info_from_metadata(
            package_metadata, 
            contact_types, 
            respect_privacy=True
        )
        assert result["privacy_compliant"] is True
        # With privacy enabled, emails should be hidden
        assert "email_note" in result["available_contacts"]
        # Project URLs should still be included
        assert len(result["project_urls"]) > 0
    def test_extract_contact_info_from_metadata_without_privacy(self):
        """Test extracting contact info with privacy disabled."""
        package_metadata = {
            "author_email": "author@example.com",
            "maintainer_email": "maintainer@example.com",
            "project_urls": {
                "Repository": "https://github.com/test/repo",
            },
        }
        contact_types = ["email", "github"]
        result = _extract_contact_info_from_metadata(
            package_metadata, 
            contact_types, 
            respect_privacy=False
        )
        assert result["privacy_compliant"] is False
        # With privacy disabled, emails should be included
        assert "author_email" in result["available_contacts"]
        assert "maintainer_email" in result["available_contacts"]
 class TestCommunityIntegrations:
    """Test community tool integrations with external services."""
    async def test_github_community_sentiment_no_repository(self):
        """Test GitHub sentiment analysis when no repository is found."""
        with patch("pypi_query_mcp.tools.community._find_github_repository") as mock_find:
            mock_find.return_value = {"status": "no_github_repository"}
            result = await _analyze_github_community_sentiment("test-package")
            assert result["status"] == "no_github_repository"
    async def test_stackoverflow_mentions_api_error(self):
        """Test Stack Overflow mentions with API error."""
        with patch("httpx.AsyncClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get.return_value.status_code = 500
            result = await _check_stackoverflow_mentions("test-package")
            assert result["status"] == "api_unavailable"
            assert result["questions_found"] == 0
    async def test_quality_indicator_with_download_stats(self):
        """Test quality indicator calculation with download stats."""
        with patch("pypi_query_mcp.tools.community.get_package_download_stats") as mock_stats:
            mock_stats.return_value = {
                "downloads": {
                    "last_month": 500000,
                    "last_week": 125000,
                    "last_day": 18000,
                }
            }
            result = await _analyze_pypi_downloads_as_quality_indicator("test-package")
            assert result["adoption_level"] == "high"
            assert result["quality_indicator_score"] > 0
            assert "download_stats" in result
    async def test_community_health_metrics_no_repository(self):
        """Test community health metrics when no repository exists."""
        with patch("pypi_query_mcp.tools.community._find_github_repository") as mock_find:
            mock_find.return_value = {"status": "no_github_repository"}
            result = await _get_community_health_metrics("test-package")
            assert result["has_repository"] is False
            assert "note" in result
@pytest.mark.asyncio
 class TestAsyncBehavior:
    """Test async behavior and error handling."""
    async def test_concurrent_operations_success(self):
        """Test that concurrent operations work correctly."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_meta, \
             patch("pypi_query_mcp.tools.community._analyze_github_community_sentiment") as mock_github, \
             patch("pypi_query_mcp.tools.community._check_stackoverflow_mentions") as mock_so, \
             patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
             patch("pypi_query_mcp.tools.community._get_community_health_metrics") as mock_health:
            # Set up mocks to return after small delays to test concurrency
            import asyncio
            async def delayed_return(value, delay=0.01):
                await asyncio.sleep(delay)
                return value
            mock_meta.return_value = delayed_return({"name": "test-package"})
            mock_github.return_value = delayed_return({"sentiment_analysis": {"overall_sentiment_score": 75}})
            mock_so.return_value = delayed_return({"sentiment_analysis": {"overall_sentiment_score": 70}})
            mock_quality.return_value = delayed_return({"quality_indicator_score": 80})
            mock_health.return_value = delayed_return({"has_repository": True})
            start_time = datetime.now()
            result = await get_pypi_package_reviews("test-package")
            end_time = datetime.now()
            # Should complete relatively quickly due to concurrent execution
            assert (end_time - start_time).total_seconds() < 1.0
            assert result["package"] == "test-package"
    async def test_partial_failure_handling(self):
        """Test handling when some operations fail but others succeed."""
        with patch("pypi_query_mcp.tools.community._get_package_metadata_for_reviews") as mock_meta, \
             patch("pypi_query_mcp.tools.community._analyze_github_community_sentiment", side_effect=Exception("GitHub error")) as mock_github, \
             patch("pypi_query_mcp.tools.community._check_stackoverflow_mentions") as mock_so, \
             patch("pypi_query_mcp.tools.community._analyze_pypi_downloads_as_quality_indicator") as mock_quality, \
             patch("pypi_query_mcp.tools.community._get_community_health_metrics", side_effect=Exception("Health error")) as mock_health:
            mock_meta.return_value = {"name": "test-package"}
            mock_so.return_value = {"sentiment_analysis": {"overall_sentiment_score": 70}}
            mock_quality.return_value = {"quality_indicator_score": 80}
            result = await get_pypi_package_reviews("test-package")
            # Should still return a result even with some failures
            assert result["package"] == "test-package"
            assert "community_score" in result
            # Failed operations should result in empty dicts or be excluded
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@ -1,730 +0,0 @@
 """Tests for PyPI Discovery & Monitoring Tools."""
 import pytest
 from datetime import datetime, timedelta
 from unittest.mock import AsyncMock, patch, Mock
 from pypi_query_mcp.core.exceptions import InvalidPackageNameError, NetworkError, SearchError
 from pypi_query_mcp.tools.discovery import (
    DiscoveryCache,
    get_pypi_package_recommendations,
    get_pypi_trending_today,
    monitor_pypi_new_releases,
    search_pypi_by_maintainer,
    _categorize_package,
    _is_package_maintainer,
    _discovery_cache,
 )
 class TestDiscoveryCache:
    """Test the DiscoveryCache functionality."""
    def test_cache_basic_operations(self):
        """Test basic cache get/set operations."""
        cache = DiscoveryCache(default_ttl=60)
        # Test empty cache
        assert cache.get("nonexistent") is None
        # Test set and get
        test_data = {"test": "value"}
        cache.set("test_key", test_data)
        assert cache.get("test_key") == test_data
        # Test clear
        cache.clear()
        assert cache.get("test_key") is None
    def test_cache_expiration(self):
        """Test cache expiration functionality."""
        cache = DiscoveryCache(default_ttl=1)  # 1 second TTL
        test_data = {"test": "value"}
        cache.set("test_key", test_data)
        # Should be available immediately
        assert cache.get("test_key") == test_data
        # Mock time to simulate expiration
        with patch("time.time", return_value=1000000):
            cache.set("test_key", test_data)
        with patch("time.time", return_value=1000002):  # 2 seconds later
            assert cache.get("test_key") is None
    def test_cache_custom_ttl(self):
        """Test cache with custom TTL."""
        cache = DiscoveryCache(default_ttl=60)
        test_data = {"test": "value"}
        cache.set("test_key", test_data, ttl=120)  # Custom 2-minute TTL
        # Should still be available after default TTL would expire
        with patch("time.time", return_value=1000000):
            cache.set("test_key", test_data, ttl=120)
        with patch("time.time", return_value=1000060):  # 1 minute later
            assert cache.get("test_key") == test_data
        with patch("time.time", return_value=1000130):  # 2+ minutes later
            assert cache.get("test_key") is None
 class TestMonitorPyPINewReleases:
    """Test the monitor_pypi_new_releases function."""
    @pytest.mark.asyncio
    async def test_monitor_basic_functionality(self):
        """Test basic monitoring functionality."""
        mock_releases = [
            {
                "name": "test-package",
                "version": "1.0.0",
                "release_time": "2023-01-01T12:00:00Z",
                "description": "Test package",
                "link": "https://pypi.org/project/test-package/",
            }
        ]
        mock_package_info = {
            "info": {
                "name": "test-package",
                "version": "1.0.0",
                "summary": "A test package",
                "author": "Test Author",
                "license": "MIT",
                "home_page": "https://example.com",
                "keywords": "test, package",
                "requires_python": ">=3.8",
                "project_urls": {},
                "classifiers": ["Topic :: Software Development"],
            }
        }
        with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
            mock_fetch.return_value = mock_releases
            with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
                mock_client = AsyncMock()
                mock_client_class.return_value.__aenter__.return_value = mock_client
                mock_client.get_package_info.return_value = mock_package_info
                with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                    mock_categorize.return_value = ["software-development"]
                    result = await monitor_pypi_new_releases(hours=24)
                    assert "new_releases" in result
                    assert result["total_releases_found"] == 1
                    assert result["monitoring_period_hours"] == 24
                    assert len(result["new_releases"]) == 1
                    release = result["new_releases"][0]
                    assert release["name"] == "test-package"
                    assert release["summary"] == "A test package"
                    assert "categories" in release
    @pytest.mark.asyncio
    async def test_monitor_with_filters(self):
        """Test monitoring with various filters."""
        mock_releases = [
            {
                "name": "web-package",
                "version": "1.0.0",
                "release_time": "2023-01-01T12:00:00Z",
                "description": "Web framework",
                "link": "https://pypi.org/project/web-package/",
            },
            {
                "name": "data-package",
                "version": "2.0.0",
                "release_time": "2023-01-01T13:00:00Z",
                "description": "Data science package",
                "link": "https://pypi.org/project/data-package/",
            }
        ]
        with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
            mock_fetch.return_value = mock_releases
            with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
                mock_client = AsyncMock()
                mock_client_class.return_value.__aenter__.return_value = mock_client
                def mock_get_package_info(package_name):
                    if package_name == "web-package":
                        return {
                            "info": {
                                "name": "web-package",
                                "author": "Web Author",
                                "summary": "Web framework",
                                "license": "MIT",
                            }
                        }
                    elif package_name == "data-package":
                        return {
                            "info": {
                                "name": "data-package", 
                                "author": "Data Author",
                                "summary": "Data science package",
                                "license": "Apache",
                            }
                        }
                mock_client.get_package_info.side_effect = mock_get_package_info
                with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                    def mock_categorize_func(info):
                        if "web" in info.get("summary", "").lower():
                            return ["web"]
                        elif "data" in info.get("summary", "").lower():
                            return ["data-science"]
                        return ["general"]
                    mock_categorize.side_effect = mock_categorize_func
                    # Test category filtering
                    result = await monitor_pypi_new_releases(
                        categories=["web"],
                        hours=24
                    )
                    assert result["total_releases_found"] == 1
                    assert result["new_releases"][0]["name"] == "web-package"
                    # Test maintainer filtering
                    result = await monitor_pypi_new_releases(
                        maintainer_filter="Web Author",
                        hours=24
                    )
                    assert result["total_releases_found"] == 1
                    assert result["new_releases"][0]["name"] == "web-package"
    @pytest.mark.asyncio
    async def test_monitor_cache_functionality(self):
        """Test cache functionality in monitoring."""
        # Clear cache first
        _discovery_cache.clear()
        mock_releases = [
            {
                "name": "cached-package",
                "version": "1.0.0",
                "release_time": "2023-01-01T12:00:00Z",
                "description": "Cached package",
                "link": "https://pypi.org/project/cached-package/",
            }
        ]
        with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
            mock_fetch.return_value = mock_releases
            with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
                mock_client = AsyncMock()
                mock_client_class.return_value.__aenter__.return_value = mock_client
                mock_client.get_package_info.return_value = {
                    "info": {
                        "name": "cached-package",
                        "summary": "Cached package",
                        "author": "Cache Author",
                    }
                }
                with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                    mock_categorize.return_value = ["general"]
                    # First call should fetch data
                    result1 = await monitor_pypi_new_releases(hours=24, cache_ttl=300)
                    assert mock_fetch.call_count == 1
                    # Second call with same parameters should use cache
                    result2 = await monitor_pypi_new_releases(hours=24, cache_ttl=300)
                    assert mock_fetch.call_count == 1  # Should not increase
                    # Results should be identical
                    assert result1["timestamp"] == result2["timestamp"]
    @pytest.mark.asyncio
    async def test_monitor_error_handling(self):
        """Test error handling in monitoring."""
        with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
            mock_fetch.side_effect = Exception("Network error")
            with pytest.raises(NetworkError):
                await monitor_pypi_new_releases(hours=24)
 class TestGetPyPITrendingToday:
    """Test the get_pypi_trending_today function."""
    @pytest.mark.asyncio
    async def test_trending_basic_functionality(self):
        """Test basic trending analysis."""
        mock_releases_result = {
            "new_releases": [
                {
                    "name": "trending-package",
                    "version": "1.0.0",
                    "summary": "Trending package",
                    "categories": ["web"],
                    "release_time": "2023-01-01T12:00:00Z",
                }
            ]
        }
        mock_trending_result = {
            "trending_packages": [
                {
                    "package": "popular-package",
                    "downloads": {"last_day": 10000},
                    "summary": "Popular package",
                }
            ]
        }
        with patch("pypi_query_mcp.tools.discovery.monitor_pypi_new_releases") as mock_monitor:
            mock_monitor.return_value = mock_releases_result
            with patch("pypi_query_mcp.tools.search.get_trending_packages") as mock_trending:
                mock_trending.return_value = mock_trending_result
                with patch("pypi_query_mcp.tools.discovery._enhance_trending_analysis") as mock_enhance:
                    mock_enhance.return_value = [
                        {
                            "name": "trending-package",
                            "trending_score": 10.0,
                            "trending_reason": "new_release",
                        },
                        {
                            "name": "popular-package",
                            "trending_score": 8.0,
                            "trending_reason": "download_surge",
                        }
                    ]
                    result = await get_pypi_trending_today(
                        category="web",
                        limit=10
                    )
                    assert "trending_today" in result
                    assert result["total_trending"] == 2
                    assert result["category"] == "web"
                    assert len(result["trending_today"]) == 2
    @pytest.mark.asyncio
    async def test_trending_with_filters(self):
        """Test trending analysis with filters."""
        with patch("pypi_query_mcp.tools.discovery.monitor_pypi_new_releases") as mock_monitor:
            mock_monitor.return_value = {"new_releases": []}
            with patch("pypi_query_mcp.tools.search.get_trending_packages") as mock_trending:
                mock_trending.return_value = {"trending_packages": []}
                with patch("pypi_query_mcp.tools.discovery._enhance_trending_analysis") as mock_enhance:
                    mock_enhance.return_value = []
                    result = await get_pypi_trending_today(
                        category="ai",
                        min_downloads=5000,
                        limit=20,
                        include_new_packages=False,
                        trending_threshold=2.0
                    )
                    assert result["category"] == "ai"
                    assert result["filters_applied"]["min_downloads"] == 5000
                    assert result["filters_applied"]["trending_threshold"] == 2.0
                    assert not result["filters_applied"]["include_new_packages"]
    @pytest.mark.asyncio
    async def test_trending_error_handling(self):
        """Test error handling in trending analysis."""
        with patch("pypi_query_mcp.tools.discovery.monitor_pypi_new_releases") as mock_monitor:
            mock_monitor.side_effect = Exception("Monitoring error")
            with pytest.raises(SearchError):
                await get_pypi_trending_today()
 class TestSearchPyPIByMaintainer:
    """Test the search_pypi_by_maintainer function."""
    @pytest.mark.asyncio
    async def test_search_by_maintainer_basic(self):
        """Test basic maintainer search functionality."""
        mock_search_results = {
            "packages": [
                {
                    "name": "maintainer-package-1",
                    "summary": "First package",
                },
                {
                    "name": "maintainer-package-2", 
                    "summary": "Second package",
                }
            ]
        }
        mock_package_info = {
            "info": {
                "name": "maintainer-package-1",
                "version": "1.0.0",
                "summary": "First package",
                "author": "Test Maintainer",
                "author_email": "test@example.com",
                "license": "MIT",
                "keywords": "test",
                "classifiers": [],
                "requires_python": ">=3.8",
            }
        }
        with patch("pypi_query_mcp.tools.search.search_packages") as mock_search:
            mock_search.return_value = mock_search_results
            with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
                mock_client = AsyncMock()
                mock_client_class.return_value.__aenter__.return_value = mock_client
                mock_client.get_package_info.return_value = mock_package_info
                with patch("pypi_query_mcp.tools.discovery._is_package_maintainer") as mock_is_maintainer:
                    mock_is_maintainer.return_value = True
                    with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                        mock_categorize.return_value = ["development"]
                        result = await search_pypi_by_maintainer(
                            maintainer="Test Maintainer",
                            sort_by="popularity"
                        )
                        assert result["maintainer"] == "Test Maintainer"
                        assert result["total_packages"] == 1
                        assert len(result["packages"]) == 1
                        assert "portfolio_analysis" in result
                        assert "maintainer_profile" in result
    @pytest.mark.asyncio
    async def test_search_by_maintainer_invalid_input(self):
        """Test maintainer search with invalid input."""
        with pytest.raises(InvalidPackageNameError):
            await search_pypi_by_maintainer("")
        with pytest.raises(InvalidPackageNameError):
            await search_pypi_by_maintainer("   ")
    @pytest.mark.asyncio
    async def test_search_by_maintainer_with_stats(self):
        """Test maintainer search with download statistics."""
        mock_search_results = {"packages": [{"name": "stats-package"}]}
        mock_package_info = {
            "info": {
                "name": "stats-package",
                "version": "1.0.0",
                "author": "Stats Maintainer",
                "summary": "Package with stats",
            }
        }
        mock_stats = {
            "recent_downloads": {
                "last_month": 50000,
                "last_week": 12000,
                "last_day": 2000,
            }
        }
        with patch("pypi_query_mcp.tools.search.search_packages") as mock_search:
            mock_search.return_value = mock_search_results
            with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
                mock_client = AsyncMock()
                mock_client_class.return_value.__aenter__.return_value = mock_client
                mock_client.get_package_info.return_value = mock_package_info
                with patch("pypi_query_mcp.tools.discovery._is_package_maintainer") as mock_is_maintainer:
                    mock_is_maintainer.return_value = True
                    with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                        mock_categorize.return_value = ["general"]
                        with patch("pypi_query_mcp.tools.download_stats.get_package_download_stats") as mock_get_stats:
                            mock_get_stats.return_value = mock_stats
                            result = await search_pypi_by_maintainer(
                                maintainer="Stats Maintainer",
                                include_stats=True
                            )
                            assert result["total_packages"] == 1
                            package = result["packages"][0]
                            assert "download_stats" in package
                            assert package["download_stats"]["last_month"] == 50000
    @pytest.mark.asyncio
    async def test_search_by_maintainer_error_handling(self):
        """Test error handling in maintainer search."""
        with patch("pypi_query_mcp.tools.search.search_packages") as mock_search:
            mock_search.side_effect = Exception("Search error")
            with pytest.raises(SearchError):
                await search_pypi_by_maintainer("Error Maintainer")
 class TestGetPyPIPackageRecommendations:
    """Test the get_pypi_package_recommendations function."""
    @pytest.mark.asyncio
    async def test_recommendations_basic_functionality(self):
        """Test basic recommendation functionality."""
        mock_package_info = {
            "info": {
                "name": "base-package",
                "version": "1.0.0",
                "summary": "Base package for recommendations",
                "keywords": "test, recommendations",
                "classifiers": ["Topic :: Software Development"],
            }
        }
        with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
            mock_client = AsyncMock()
            mock_client_class.return_value.__aenter__.return_value = mock_client
            mock_client.get_package_info.return_value = mock_package_info
            with patch("pypi_query_mcp.tools.discovery._find_similar_packages") as mock_similar:
                mock_similar.return_value = [
                    {
                        "name": "similar-package",
                        "type": "similar",
                        "confidence": 0.8,
                        "reason": "Similar functionality",
                    }
                ]
                with patch("pypi_query_mcp.tools.discovery._enhance_recommendations") as mock_enhance:
                    mock_enhance.return_value = [
                        {
                            "name": "similar-package",
                            "type": "similar",
                            "confidence": 0.8,
                            "summary": "Similar package",
                            "categories": ["development"],
                        }
                    ]
                    with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                        mock_categorize.return_value = ["development"]
                        result = await get_pypi_package_recommendations(
                            package_name="base-package",
                            recommendation_type="similar"
                        )
                        assert result["base_package"]["name"] == "base-package"
                        assert result["total_recommendations"] == 1
                        assert result["recommendation_type"] == "similar"
                        assert len(result["recommendations"]) == 1
    @pytest.mark.asyncio
    async def test_recommendations_different_types(self):
        """Test different recommendation types."""
        mock_package_info = {
            "info": {
                "name": "test-package",
                "version": "1.0.0",
                "summary": "Test package",
            }
        }
        with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
            mock_client = AsyncMock()
            mock_client_class.return_value.__aenter__.return_value = mock_client
            mock_client.get_package_info.return_value = mock_package_info
            with patch("pypi_query_mcp.tools.discovery._find_complementary_packages") as mock_complementary:
                mock_complementary.return_value = [
                    {
                        "name": "complementary-package",
                        "type": "complementary",
                        "confidence": 0.9,
                    }
                ]
                with patch("pypi_query_mcp.tools.discovery._enhance_recommendations") as mock_enhance:
                    mock_enhance.return_value = [
                        {
                            "name": "complementary-package",
                            "type": "complementary",
                            "confidence": 0.9,
                        }
                    ]
                    with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                        mock_categorize.return_value = ["general"]
                        result = await get_pypi_package_recommendations(
                            package_name="test-package",
                            recommendation_type="complementary"
                        )
                        assert result["recommendation_type"] == "complementary"
                        assert result["total_recommendations"] == 1
    @pytest.mark.asyncio
    async def test_recommendations_with_user_context(self):
        """Test recommendations with user context."""
        mock_package_info = {
            "info": {
                "name": "context-package",
                "version": "1.0.0",
                "summary": "Package with context",
            }
        }
        user_context = {
            "experience_level": "beginner",
            "use_case": "web development",
        }
        with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
            mock_client = AsyncMock()
            mock_client_class.return_value.__aenter__.return_value = mock_client
            mock_client.get_package_info.return_value = mock_package_info
            with patch("pypi_query_mcp.tools.discovery._find_similar_packages") as mock_similar:
                mock_similar.return_value = []
                with patch("pypi_query_mcp.tools.discovery._enhance_recommendations") as mock_enhance:
                    mock_enhance.return_value = []
                    with patch("pypi_query_mcp.tools.discovery._categorize_package") as mock_categorize:
                        mock_categorize.return_value = ["web"]
                        result = await get_pypi_package_recommendations(
                            package_name="context-package",
                            user_context=user_context
                        )
                        assert result["parameters"]["user_context"] == user_context
                        assert result["algorithm_insights"]["personalization_applied"] == True
    @pytest.mark.asyncio
    async def test_recommendations_invalid_input(self):
        """Test recommendations with invalid input."""
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_package_recommendations("")
        with pytest.raises(InvalidPackageNameError):
            await get_pypi_package_recommendations("   ")
    @pytest.mark.asyncio
    async def test_recommendations_error_handling(self):
        """Test error handling in recommendations."""
        with patch("pypi_query_mcp.tools.discovery.PyPIClient") as mock_client_class:
            mock_client = AsyncMock()
            mock_client_class.return_value.__aenter__.return_value = mock_client
            mock_client.get_package_info.side_effect = Exception("Package error")
            with pytest.raises(SearchError):
                await get_pypi_package_recommendations("error-package")
 class TestHelperFunctions:
    """Test helper functions used by discovery tools."""
    def test_categorize_package(self):
        """Test package categorization."""
        # Test with classifiers
        package_info = {
            "summary": "Web framework for Python",
            "description": "A micro web framework",
            "keywords": "web, framework, api",
            "classifiers": [
                "Topic :: Internet :: WWW/HTTP :: Dynamic Content",
                "Topic :: Software Development :: Libraries :: Python Modules"
            ],
        }
        with patch("pypi_query_mcp.tools.discovery._categorize_package", return_value=["web", "internet"]):
            categories = _categorize_package(package_info)
            assert "web" in categories
    def test_is_package_maintainer(self):
        """Test maintainer checking functionality."""
        package_info = {
            "author": "John Doe",
            "author_email": "john@example.com",
            "maintainer": "Jane Smith",
            "maintainer_email": "jane@example.com",
        }
        # Test author match
        assert _is_package_maintainer(package_info, "John Doe", False) == True
        assert _is_package_maintainer(package_info, "john doe", False) == True
        # Test maintainer match
        assert _is_package_maintainer(package_info, "Jane Smith", False) == True
        # Test no match
        assert _is_package_maintainer(package_info, "Bob Wilson", False) == False
        # Test email match (when enabled)
        assert _is_package_maintainer(package_info, "john@example.com", True) == True
        assert _is_package_maintainer(package_info, "john@example.com", False) == False
@pytest.fixture
 def mock_rss_response():
    """Mock RSS response for testing."""
    return '''<?xml version="1.0" encoding="UTF-8"?>
    <rss version="2.0">
        <channel>
            <title>PyPI Recent Updates</title>
            <item>
                <title>test-package 1.0.0</title>
                <description>Test package description</description>
                <link>https://pypi.org/project/test-package/</link>
                <pubDate>Mon, 01 Jan 2023 12:00:00 GMT</pubDate>
            </item>
        </channel>
    </rss>'''
 class TestIntegration:
    """Integration tests for discovery tools."""
    @pytest.mark.asyncio
    async def test_full_workflow_monitoring_to_recommendations(self):
        """Test full workflow from monitoring to recommendations."""
        # This would be a more complex integration test
        # that combines multiple functions in a realistic workflow
        pass
    @pytest.mark.asyncio
    async def test_cache_consistency_across_functions(self):
        """Test cache consistency across different discovery functions."""
        # Clear cache first
        _discovery_cache.clear()
        # Test that cache is properly shared between functions
        with patch("pypi_query_mcp.tools.discovery._fetch_recent_releases_from_rss") as mock_fetch:
            mock_fetch.return_value = []
            # First call should populate cache
            await monitor_pypi_new_releases(hours=24, cache_ttl=300)
            assert mock_fetch.call_count == 1
            # Second call should use cache
            await monitor_pypi_new_releases(hours=24, cache_ttl=300)
            assert mock_fetch.call_count == 1  # Should not increase
    def test_error_propagation(self):
        """Test that errors are properly propagated and handled."""
        # Test various error scenarios and ensure they're handled consistently
        pass
 # Additional test classes for edge cases and performance testing could be added here
--- a/tests/test_workflow.py
+++ b/tests/test_workflow.py
@ -1,574 +0,0 @@
 """Tests for PyPI Development Workflow Tools."""
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 from pypi_query_mcp.core.exceptions import (
    InvalidPackageNameError,
    NetworkError,
    PackageNotFoundError,
 )
 from pypi_query_mcp.tools.workflow import (
    PyPIWorkflowError,
    _analyze_build_quality,
    _analyze_wheel_filename,
    _calculate_completeness_score,
    _calculate_discoverability_score,
    _generate_html_preview,
    _generate_next_steps,
    _validate_package_name_format,
    check_pypi_upload_requirements,
    get_pypi_build_logs,
    preview_pypi_package_page,
    validate_pypi_package_name,
 )
 class TestValidatePackageNameFormat:
    """Test package name format validation."""
    def test_valid_package_names(self):
        """Test that valid package names pass validation."""
        valid_names = [
            "mypackage",
            "my-package",
            "my_package",
            "my.package",
            "package123",
            "a",
            "package-name-123",
        ]
        for name in valid_names:
            result = _validate_package_name_format(name)
            assert result["valid"] is True, f"'{name}' should be valid"
            assert len(result["issues"]) == 0
    def test_invalid_package_names(self):
        """Test that invalid package names fail validation."""
        invalid_names = [
            "",  # Empty
            "-package",  # Starts with hyphen
            "package-",  # Ends with hyphen
            ".package",  # Starts with dot
            "package.",  # Ends with dot
            "pack--age",  # Double hyphen
            "pack..age",  # Double dot
            "pack@age",  # Invalid character
            "PACKAGE",  # Uppercase (should get recommendation)
        ]
        for name in invalid_names:
            result = _validate_package_name_format(name)
            if name == "PACKAGE":
                # This should be valid but get recommendations
                assert result["valid"] is True
                assert len(result["recommendations"]) > 0
            else:
                assert result["valid"] is False or len(result["issues"]) > 0, f"'{name}' should be invalid"
    def test_reserved_names(self):
        """Test that reserved names are flagged."""
        reserved_names = ["pip", "setuptools", "wheel", "python"]
        for name in reserved_names:
            result = _validate_package_name_format(name)
            assert result["valid"] is False
            assert any("reserved" in issue.lower() for issue in result["issues"])
    def test_normalization(self):
        """Test package name normalization."""
        test_cases = [
            ("My_Package", "my-package"),
            ("my__package", "my-package"),
            ("my.-_package", "my-package"),
            ("PACKAGE", "package"),
        ]
        for input_name, expected in test_cases:
            result = _validate_package_name_format(input_name)
            assert result["normalized_name"] == expected
 class TestValidatePyPIPackageName:
    """Test the main package name validation function."""
    @pytest.mark.asyncio
    async def test_validate_available_package(self):
        """Test validation of an available package name."""
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            # Mock package not found (available)
            mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = PackageNotFoundError("test-package")
            result = await validate_pypi_package_name("test-package")
            assert result["package_name"] == "test-package"
            assert result["availability"]["status"] == "available"
            assert result["ready_for_upload"] is True
    @pytest.mark.asyncio
    async def test_validate_taken_package(self):
        """Test validation of a taken package name."""
        mock_package_data = {
            "info": {
                "name": "requests",
                "version": "2.28.0",
                "summary": "Python HTTP for Humans.",
                "author": "Kenneth Reitz",
            }
        }
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
            result = await validate_pypi_package_name("requests")
            assert result["package_name"] == "requests"
            assert result["availability"]["status"] == "taken"
            assert result["availability"]["existing_package"]["name"] == "requests"
            assert result["ready_for_upload"] is False
    @pytest.mark.asyncio
    async def test_validate_invalid_format(self):
        """Test validation of invalid package name format."""
        with pytest.raises(InvalidPackageNameError):
            await validate_pypi_package_name("-invalid-")
    @pytest.mark.asyncio
    async def test_network_error_handling(self):
        """Test handling of network errors during validation."""
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = NetworkError("Connection failed")
            result = await validate_pypi_package_name("test-package")
            assert result["availability"]["status"] == "unknown"
 class TestPreviewPyPIPackagePage:
    """Test package page preview generation."""
    @pytest.mark.asyncio
    async def test_basic_preview_generation(self):
        """Test basic preview generation with minimal metadata."""
        result = await preview_pypi_package_page(
            package_name="my-package",
            version="1.0.0",
            summary="A test package",
            author="Test Author"
        )
        assert result["package_name"] == "my-package"
        assert result["version"] == "1.0.0"
        assert result["preview"]["sections"]["header"]["summary"] == "A test package"
        assert result["preview"]["sections"]["header"]["author"] == "Test Author"
    @pytest.mark.asyncio
    async def test_comprehensive_preview(self):
        """Test preview generation with comprehensive metadata."""
        keywords = ["testing", "python", "package"]
        classifiers = [
            "Development Status :: 4 - Beta",
            "Programming Language :: Python :: 3.8",
            "License :: OSI Approved :: MIT License",
        ]
        result = await preview_pypi_package_page(
            package_name="comprehensive-package",
            version="2.1.0",
            summary="A comprehensive test package with full metadata",
            description="This is a detailed description of the package functionality...",
            author="Test Author",
            license_name="MIT",
            home_page="https://github.com/test/package",
            keywords=keywords,
            classifiers=classifiers,
        )
        assert result["ready_for_upload"] is True
        assert result["validation"]["completeness_score"]["level"] in ["good", "complete"]
        assert result["seo_analysis"]["discoverability_score"]["level"] in ["good", "excellent"]
    @pytest.mark.asyncio
    async def test_preview_warnings(self):
        """Test that preview generates appropriate warnings."""
        result = await preview_pypi_package_page(
            package_name="minimal-package",
            # Minimal metadata to trigger warnings
        )
        assert len(result["warnings"]) > 0
        assert any("Summary is missing" in warning for warning in result["warnings"])
        assert any("description" in warning.lower() for warning in result["warnings"])
    @pytest.mark.asyncio
    async def test_invalid_package_name_preview(self):
        """Test preview with invalid package name."""
        with pytest.raises(InvalidPackageNameError):
            await preview_pypi_package_page("-invalid-package-")
 class TestCalculateScores:
    """Test scoring calculation functions."""
    def test_discoverability_score_calculation(self):
        """Test discoverability score calculation."""
        # High quality metadata
        result = _calculate_discoverability_score(
            summary="A comprehensive package for testing",
            description="This is a very detailed description with lots of useful information about the package functionality and use cases.",
            keywords=["testing", "python", "package", "quality", "automation"],
            classifiers=["Development Status :: 4 - Beta", "Programming Language :: Python :: 3.8"]
        )
        assert result["score"] >= 70
        assert result["level"] in ["good", "excellent"]
        # Poor quality metadata
        result = _calculate_discoverability_score("", "", [], [])
        assert result["score"] == 0
        assert result["level"] == "poor"
    def test_completeness_score_calculation(self):
        """Test completeness score calculation."""
        # Complete metadata
        sections = {
            "header": {
                "summary": "A test package",
                "author": "Test Author",
            },
            "metadata": {
                "license": "MIT",
                "home_page": "https://github.com/test/package",
                "keywords": ["test", "package"],
                "classifiers": ["Development Status :: 4 - Beta"],
            },
            "description": {
                "content": "A detailed description with more than 200 characters to ensure it gets a good score.",
                "length": 80,
            }
        }
        result = _calculate_completeness_score(sections)
        assert result["score"] >= 60
        assert result["level"] in ["good", "complete"]
 class TestCheckPyPIUploadRequirements:
    """Test PyPI upload requirements checking."""
    @pytest.mark.asyncio
    async def test_minimal_requirements_met(self):
        """Test with minimal required fields."""
        result = await check_pypi_upload_requirements(
            package_name="test-package",
            version="1.0.0",
            author="Test Author",
            description="A test package"
        )
        assert result["upload_readiness"]["can_upload"] is True
        assert result["validation"]["compliance"]["required_percentage"] == 100.0
    @pytest.mark.asyncio
    async def test_missing_required_fields(self):
        """Test with missing required fields."""
        result = await check_pypi_upload_requirements(
            package_name="test-package",
            # Missing required fields
        )
        assert result["upload_readiness"]["can_upload"] is False
        assert len(result["issues"]["errors"]) > 0
    @pytest.mark.asyncio
    async def test_comprehensive_metadata(self):
        """Test with comprehensive metadata."""
        classifiers = [
            "Development Status :: 4 - Beta",
            "Programming Language :: Python :: 3.8",
            "License :: OSI Approved :: MIT License",
        ]
        result = await check_pypi_upload_requirements(
            package_name="comprehensive-package",
            version="1.0.0",
            author="Test Author",
            author_email="test@example.com",
            description="A comprehensive test package",
            long_description="This is a detailed description...",
            license_name="MIT",
            home_page="https://github.com/test/package",
            classifiers=classifiers,
            requires_python=">=3.8"
        )
        assert result["upload_readiness"]["should_upload"] is True
        assert result["validation"]["compliance"]["recommended_percentage"] >= 80.0
    @pytest.mark.asyncio
    async def test_invalid_package_name_requirements(self):
        """Test requirements check with invalid package name."""
        with pytest.raises(InvalidPackageNameError):
            await check_pypi_upload_requirements("-invalid-")
 class TestGetPyPIBuildLogs:
    """Test PyPI build logs analysis."""
    @pytest.mark.asyncio
    async def test_analyze_package_with_wheels(self):
        """Test analysis of package with wheel distributions."""
        mock_package_data = {
            "info": {"name": "test-package", "version": "1.0.0"},
            "releases": {
                "1.0.0": [
                    {
                        "filename": "test_package-1.0.0-py3-none-any.whl",
                        "packagetype": "bdist_wheel",
                        "size": 10000,
                        "upload_time_iso_8601": "2023-01-01T00:00:00Z",
                        "python_version": "py3",
                        "url": "https://files.pythonhosted.org/...",
                        "md5_digest": "abc123",
                        "digests": {"sha256": "def456"},
                    },
                    {
                        "filename": "test-package-1.0.0.tar.gz",
                        "packagetype": "sdist",
                        "size": 15000,
                        "upload_time_iso_8601": "2023-01-01T00:00:00Z",
                        "python_version": "source",
                        "url": "https://files.pythonhosted.org/...",
                        "md5_digest": "ghi789",
                        "digests": {"sha256": "jkl012"},
                    }
                ]
            },
            "urls": []  # Empty for this test
        }
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
            result = await get_pypi_build_logs("test-package")
            assert result["package_name"] == "test-package"
            assert result["build_summary"]["wheel_count"] == 1
            assert result["build_summary"]["source_count"] == 1
            assert result["build_status"]["has_wheels"] is True
            assert result["build_status"]["has_source"] is True
    @pytest.mark.asyncio
    async def test_analyze_source_only_package(self):
        """Test analysis of package with only source distribution."""
        mock_package_data = {
            "info": {"name": "source-only", "version": "1.0.0"},
            "releases": {
                "1.0.0": [
                    {
                        "filename": "source-only-1.0.0.tar.gz",
                        "packagetype": "sdist",
                        "size": 20000,
                        "upload_time_iso_8601": "2023-01-01T00:00:00Z",
                        "python_version": "source",
                    }
                ]
            },
            "urls": []
        }
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
            result = await get_pypi_build_logs("source-only")
            assert result["build_status"]["has_wheels"] is False
            assert result["build_status"]["has_source"] is True
            assert any("No wheel distributions" in warning for warning in result["issues"]["warnings"])
    @pytest.mark.asyncio
    async def test_package_not_found_build_logs(self):
        """Test build logs for non-existent package."""
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = PackageNotFoundError("nonexistent")
            with pytest.raises(PackageNotFoundError):
                await get_pypi_build_logs("nonexistent")
    @pytest.mark.asyncio
    async def test_platform_filtering(self):
        """Test platform-specific filtering of build logs."""
        mock_package_data = {
            "info": {"name": "multi-platform", "version": "1.0.0"},
            "releases": {
                "1.0.0": [
                    {
                        "filename": "multi_platform-1.0.0-py3-none-win_amd64.whl",
                        "packagetype": "bdist_wheel",
                        "size": 10000,
                        "python_version": "py3",
                    },
                    {
                        "filename": "multi_platform-1.0.0-py3-none-linux_x86_64.whl",
                        "packagetype": "bdist_wheel", 
                        "size": 10000,
                        "python_version": "py3",
                    }
                ]
            },
            "urls": []
        }
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.return_value = mock_package_data
            # Test Windows filtering
            result = await get_pypi_build_logs("multi-platform", platform="windows")
            # Should only include Windows wheels
            windows_wheels = [w for w in result["distributions"]["wheels"] if "win" in w.get("platform", "")]
            assert len(windows_wheels) > 0
 class TestWheelFilenameAnalysis:
    """Test wheel filename analysis."""
    def test_universal_wheel_analysis(self):
        """Test analysis of universal wheel filename."""
        result = _analyze_wheel_filename("mypackage-1.0.0-py2.py3-none-any.whl")
        assert result["wheel_type"] == "universal"
        assert result["platform"] == "any"
        assert result["python_implementation"] == "universal"
    def test_platform_specific_wheel_analysis(self):
        """Test analysis of platform-specific wheel filename."""
        result = _analyze_wheel_filename("mypackage-1.0.0-cp38-cp38-win_amd64.whl")
        assert result["wheel_type"] == "platform_specific"
        assert result["platform"] == "windows"
        assert result["python_implementation"] == "cpython"
        assert result["architecture"] == "x86_64"
    def test_linux_wheel_analysis(self):
        """Test analysis of Linux wheel filename."""
        result = _analyze_wheel_filename("mypackage-1.0.0-cp39-cp39-linux_x86_64.whl")
        assert result["platform"] == "linux"
        assert result["architecture"] == "x86_64"
    def test_macos_wheel_analysis(self):
        """Test analysis of macOS wheel filename."""
        result = _analyze_wheel_filename("mypackage-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl")
        assert result["platform"] == "macos"
        assert result["architecture"] == "x86_64"
 class TestBuildQualityAnalysis:
    """Test build quality analysis."""
    def test_high_quality_build_analysis(self):
        """Test analysis of high-quality builds."""
        distributions = {
            "wheels": [
                {"platform": "windows", "size_bytes": 1000000, "python_version": "py3"},
                {"platform": "linux", "size_bytes": 1000000, "python_version": "py3"},
                {"platform": "macos", "size_bytes": 1000000, "python_version": "py3"},
            ],
            "source": [{"size_bytes": 500000}],
        }
        result = _analyze_build_quality(distributions, {})
        assert result["health_status"] in ["good", "excellent"]
        assert result["platform_coverage"] == 3
        assert len(result["health_issues"]) == 0
    def test_poor_quality_build_analysis(self):
        """Test analysis of poor-quality builds."""
        distributions = {
            "wheels": [],  # No wheels
            "source": [],  # No source
        }
        result = _analyze_build_quality(distributions, {})
        assert result["health_status"] == "poor"
        assert len(result["health_issues"]) > 0
 class TestUtilityFunctions:
    """Test utility functions."""
    def test_generate_html_preview(self):
        """Test HTML preview generation."""
        sections = {
            "header": {
                "name": "test-package",
                "version": "1.0.0",
                "summary": "A test package",
                "author": "Test Author",
            },
            "metadata": {
                "license": "MIT",
                "home_page": "https://github.com/test/package",
                "keywords": ["test"],
                "classifiers": ["Development Status :: 4 - Beta"],
            },
            "description": {
                "content": "Test description",
            }
        }
        html = _generate_html_preview(sections)
        assert "test-package" in html
        assert "1.0.0" in html
        assert "A test package" in html
        assert "Test Author" in html
        assert "MIT" in html
    def test_generate_next_steps(self):
        """Test next steps generation."""
        errors = ["Missing required field: name"]
        warnings = ["Author email is recommended"]
        suggestions = ["Consider adding keywords"]
        steps = _generate_next_steps(errors, warnings, suggestions, False)
        assert len(steps) > 0
        assert any("Fix critical errors" in step for step in steps)
        # Test with upload ready
        steps_ready = _generate_next_steps([], warnings, suggestions, True)
        assert any("Ready for upload" in step for step in steps_ready)
 class TestErrorHandling:
    """Test error handling in workflow functions."""
    @pytest.mark.asyncio
    async def test_workflow_error_handling(self):
        """Test custom workflow error handling."""
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.side_effect = Exception("Unexpected error")
            with pytest.raises(PyPIWorkflowError) as exc_info:
                await validate_pypi_package_name("test-package")
            assert "validate_name" in str(exc_info.value.operation)
    @pytest.mark.asyncio
    async def test_network_error_propagation(self):
        """Test that network errors are properly propagated."""
        with patch("pypi_query_mcp.tools.workflow.PyPIClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get_package_info.side_effect = NetworkError("Network down")
            with pytest.raises(PyPIWorkflowError):
                await get_pypi_build_logs("test-package")
 if __name__ == "__main__":
    pytest.main([__file__])