llm-fusion-mcp/test_comprehensive.py

#!/usr/bin/env python3
"""Comprehensive test for all MCP server features."""

import os
import json
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

def test_embeddings():
    """Test text embeddings functionality."""
    print("Testing text embeddings...")
    print("=" * 50)

    client = OpenAI(
        api_key=os.getenv("GOOGLE_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
    )

    response = client.embeddings.create(
        input="The quick brown fox jumps over the lazy dog",
        model="gemini-embedding-001"
    )

    print(f"Embedding dimensions: {len(response.data[0].embedding)}")
    print(f"First 5 values: {response.data[0].embedding[:5]}")
    print("✓ Embeddings working!")

def test_function_calling():
    """Test function calling functionality."""
    print("\nTesting function calling...")
    print("=" * 50)

    client = OpenAI(
        api_key=os.getenv("GOOGLE_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
    )

    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. Chicago, IL",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            }
        }
    ]

    response = client.chat.completions.create(
        model="gemini-2.0-flash",
        messages=[{"role": "user", "content": "What's the weather like in Chicago today?"}],
        tools=tools,
        tool_choice="auto"
    )

    if response.choices[0].message.tool_calls:
        tool_call = response.choices[0].message.tool_calls[0]
        print(f"Function called: {tool_call.function.name}")
        print(f"Arguments: {tool_call.function.arguments}")
        print("✓ Function calling working!")
    else:
        print("No function calls detected")

def test_thinking_mode():
    """Test thinking mode with reasoning effort."""
    print("\nTesting thinking mode...")
    print("=" * 50)

    client = OpenAI(
        api_key=os.getenv("GOOGLE_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
    )

    response = client.chat.completions.create(
        model="gemini-2.5-flash",
        reasoning_effort="low",
        messages=[
            {"role": "user", "content": "What is 45-78+5x13? Double check your work."}
        ]
    )

    print("Response:")
    print(response.choices[0].message.content[:200] + "...")
    print("✓ Thinking mode working!")

def test_cached_content():
    """Test cached content with extra_body."""
    print("\nTesting cached content...")
    print("=" * 50)

    client = OpenAI(
        api_key=os.getenv("GOOGLE_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
    )

    # Note: This would need a real cached_content ID in production
    try:
        stream = client.chat.completions.create(
            model="gemini-2.5-pro",
            messages=[{"role": "user", "content": "Summarize the content"}],
            stream=True,
            stream_options={'include_usage': True},
            extra_body={
                'extra_body': {
                    'google': {
                        'thinking_config': {'enabled': True}
                    }
                }
            }
        )

        text = ""
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content:
                text += chunk.choices[0].delta.content

        print(f"Generated text length: {len(text)}")
        print("✓ Extra body features working!")
    except Exception as e:
        print(f"Note: Cached content test needs real cache ID: {e}")

def test_structured_outputs():
    """Test structured outputs with Pydantic models."""
    print("\nTesting structured outputs...")
    print("=" * 50)

    client = OpenAI(
        api_key=os.getenv("GOOGLE_API_KEY"),
        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
    )

    try:
        from pydantic import BaseModel

        class PersonInfo(BaseModel):
            name: str
            age: int
            occupation: str
            location: str

        response = client.beta.chat.completions.parse(
            model="gemini-2.0-flash",
            messages=[
                {"role": "user", "content": "Generate info for a fictional software engineer in San Francisco"}
            ],
            response_format=PersonInfo
        )

        parsed = response.choices[0].message.parsed
        print(f"Generated person: {parsed.model_dump_json(indent=2)}")
        print("✓ Structured outputs working!")

    except ImportError:
        print("Pydantic not available for structured outputs test")
    except Exception as e:
        print(f"Structured outputs test failed: {e}")

if __name__ == "__main__":
    if not os.getenv("GOOGLE_API_KEY"):
        print("Please set GOOGLE_API_KEY environment variable")
        exit(1)

    print("Comprehensive Gemini MCP Server Test")
    print("=" * 70)

    test_embeddings()
    test_function_calling()
    test_thinking_mode()
    test_cached_content()
    test_structured_outputs()

    print("\n" + "=" * 70)
    print("All tests completed!")