How to Test MCP Servers¶
Test your Model Context Protocol (MCP) servers by running evals against them.
How It Works¶
pytest-skill-engineering uses the official MCP Python SDK to connect to MCP servers:
- Connects to the server via stdio, SSE, or Streamable HTTP transport
- Discovers tools via MCP protocol
- Routes tool calls from the LLM to the server
- Returns results back to the LLM
Transports¶
pytest-skill-engineering supports all three MCP transports.
stdio (default)¶
Launches a local subprocess and communicates via stdin/stdout:
import pytest
from pytest_skill_engineering import MCPServer, Wait
@pytest.fixture(scope="module")
def banking_server():
return MCPServer(
command=["python", "-m", "my_banking_mcp"],
wait=Wait.for_tools(["get_balance"]),
)
SSE¶
Connects to a remote server using Server-Sent Events:
@pytest.fixture(scope="module")
def remote_server():
return MCPServer(
transport="sse",
url="http://localhost:8000/sse",
)
Streamable HTTP¶
Connects to a remote server using the Streamable HTTP transport (recommended for production):
@pytest.fixture(scope="module")
def remote_server():
return MCPServer(
transport="streamable-http",
url="http://localhost:8000/mcp",
)
Authentication Headers¶
Pass headers for authenticated endpoints. Headers support ${VAR} expansion:
@pytest.fixture(scope="module")
def authenticated_server():
return MCPServer(
transport="streamable-http",
url="https://mcp.example.com/mcp",
headers={"Authorization": "Bearer ${MCP_API_TOKEN}"},
)
Configuration Options¶
# stdio transport
MCPServer(
command=["python", "-m", "server"], # Command to start server
args=["--debug"], # Additional arguments
env={"API_KEY": "xxx"}, # Environment variables
cwd="/path/to/server", # Working directory
wait=Wait.for_tools(["tool1"]), # Wait condition
)
# Remote transport (SSE or streamable-http)
MCPServer(
transport="streamable-http", # "sse" or "streamable-http"
url="http://localhost:8000/mcp", # Server URL
headers={"Authorization": "Bearer ${TOKEN}"}, # Optional headers
wait=Wait.for_tools(["tool1"]), # Wait condition
)
| Option | Transport | Description | Default |
|---|---|---|---|
transport |
All | "stdio", "sse", or "streamable-http" |
"stdio" |
command |
stdio | Command to start the MCP server | Required for stdio |
args |
stdio | Additional command-line arguments | [] |
url |
sse, streamable-http | Server endpoint URL | Required for remote |
headers |
sse, streamable-http | HTTP headers (supports ${VAR} expansion) |
{} |
env |
stdio | Environment variables (supports ${VAR} expansion) |
{} |
cwd |
stdio | Working directory | Current directory |
wait |
All | Wait condition for server startup | Wait.ready() |
Wait Strategies¶
Control how pytest-skill-engineering waits for the server to be ready.
Wait.ready() — Wait briefly for the process to start (default):
Wait.for_tools() — Wait until specific tools are available (recommended):
Wait.for_log() — Wait for a specific log pattern (regex):
All wait strategies accept a timeout:
NPX-based Servers¶
@pytest.fixture(scope="module")
def filesystem_server():
return MCPServer(
command=["npx", "-y", "@modelcontextprotocol/server-filesystem"],
args=["/tmp/workspace"],
wait=Wait.for_tools(["read_file", "write_file"]),
)
Environment Variables¶
import os
@pytest.fixture(scope="module")
def api_server():
return MCPServer(
command=["python", "-m", "my_api_server"],
env={
"API_BASE_URL": "https://api.example.com",
"API_KEY": os.environ["MY_API_KEY"],
},
)
Complete Example¶
import pytest
from pytest_skill_engineering import MCPServer, Wait
from pytest_skill_engineering.copilot import CopilotEval
@pytest.fixture(scope="module")
def banking_server():
return MCPServer(
command=["python", "-m", "my_banking_mcp"],
wait=Wait.for_tools(["get_balance", "transfer"]),
)
@pytest.fixture
def banking_agent():
return CopilotEval(
name="banking",
instructions="You are a banking assistant.",
)
async def test_balance_query(copilot_eval, banking_agent):
result = await copilot_eval(banking_agent, "What's my checking balance?")
assert result.success
assert result.tool_was_called("get_balance")
Multiple Servers¶
Combine multiple MCP servers in a single eval:
@pytest.fixture(scope="module")
def banking_server():
return MCPServer(
command=["python", "-m", "banking_mcp"],
wait=Wait.for_tools(["get_balance"]),
)
@pytest.fixture(scope="module")
def calendar_server():
return MCPServer(
command=["python", "-m", "calendar_mcp"],
wait=Wait.for_tools(["create_event", "list_events"]),
)
@pytest.fixture
def assistant_agent():
return CopilotEval(
name="assistant",
instructions="You can check balances and manage calendar.",
)
Filtering Tools¶
Use allowed_tools on the Eval to limit which tools are exposed to the LLM. This reduces token usage and focuses the eval.
@pytest.fixture
def balance_agent():
# banking_server has 16 tools, but this test only needs 2
return CopilotEval(
name="balance-checker",
instructions="You check account balances.",
allowed_tools=["get_balance", "get_all_balances"],
)
MCP Server Prompts¶
MCP servers can bundle prompt templates alongside their tools — reusable message templates that surface in VS Code as slash commands (e.g. /mcp.servername.code_review). pytest-skill-engineering can discover and test these.
Use MCPServerProcess directly to interact with the MCP protocol:
import pytest
from pytest_skill_engineering import MCPPrompt, MCPServer
from pytest_skill_engineering.copilot import CopilotEval
from pytest_skill_engineering.execution.servers import MCPServerProcess
@pytest.fixture(scope="module")
async def server_process(banking_server):
"""Start the server and expose the raw MCP session."""
proc = MCPServerProcess(banking_server)
await proc.start()
yield proc
await proc.stop()
async def test_prompts_are_discoverable(server_process):
"""The server exposes the expected prompt templates."""
prompts = await server_process.list_prompts()
names = [p.name for p in prompts]
assert "balance_summary" in names
async def test_balance_summary_prompt(copilot_eval, server_process):
"""The balance_summary prompt produces a coherent LLM response."""
# Render the template (like VS Code does when user invokes the slash command)
messages = await server_process.get_prompt(
"balance_summary",
{"account_type": "checking"},
)
assert messages, "Prompt returned no messages"
# Run the rendered prompt through the LLM
agent = CopilotEval(
name="balance-summary",
instructions="You are a banking assistant.",
)
result = await copilot_eval(agent, messages[0]["content"])
assert result.success
What get_prompt Returns¶
get_prompt returns a list of {"role": str, "content": str} dicts — the assembled messages the MCP server produces for that template. Use messages[0]["content"] as the test prompt, or assert on the rendered content directly:
messages = await server_process.get_prompt("code_review", {"code": "def hello(): ..."})
# Structural assertion: prompt was rendered
assert len(messages) > 0
assert "hello" in messages[0]["content"] # Template filled argument in
Troubleshooting¶
Server Doesn't Start¶
Check that the command works standalone:
Tools Not Discovered¶
Use Wait.for_tools() and check server logs:
Timeout During Startup¶
Increase the timeout: