Skip to content

Commit

Permalink
Improve prompts, expand tests, and squash some Docker layers
Browse files Browse the repository at this point in the history
  • Loading branch information
tcpipuk authored Feb 23, 2025
1 parent 76353b8 commit ae9f08e
Show file tree
Hide file tree
Showing 11 changed files with 377 additions and 104 deletions.
10 changes: 10 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.dockerignore
.git
.github
.gitignore
.venv
.ruff_cache
.pytest_cache
__pycache__
docs
Dockerfile
6 changes: 5 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ concurrency:
cancel-in-progress: true

on:
push:
pull_request:
push:
paths-ignore:
- "**/*.md"
- "docs/**"
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -34,6 +37,7 @@ jobs:

- name: Run tests and output results
run: |
set -o pipefail
docker run --rm -e BUILD_ENV=dev mcp-server:test | tee pytest_output.txt
echo '```' >> $GITHUB_STEP_SUMMARY
cat pytest_output.txt >> $GITHUB_STEP_SUMMARY
Expand Down
12 changes: 7 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Build stage using uv with a frozen lockfile and dependency caching
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim AS uv
WORKDIR /app
ARG BUILD_ENV=prod

# Enable bytecode compilation and copy mode
ENV UV_COMPILE_BYTECODE=1 \
Expand All @@ -16,6 +17,11 @@ COPY . .
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen ${BUILD_ENV:+"--dev"} --no-editable

# Add the source code and install dependencies
COPY . .
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen ${BUILD_ENV:+"--dev"} --no-editable

# Prepare runtime image
FROM python:3.13-slim-bookworm AS runtime
WORKDIR /app
Expand All @@ -39,11 +45,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /root/.cache

# Copy only necessary files from build stage
COPY --from=uv --chown=app:app /app/mcp_server ./mcp_server/
COPY --from=uv --chown=app:app /app/.venv ./.venv/
COPY --from=uv --chown=app:app /app/pyproject.toml ./
COPY --from=uv --chown=app:app /app/pytest.ini ./
COPY --from=uv --chown=app:app /app/tests ./tests/
COPY --from=uv --chown=app:app /app/ .

# Switch to non-root user and set up environment
USER app
Expand Down
4 changes: 2 additions & 2 deletions mcp_server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
"""

from .__main__ import main
from .server import serve
from .server import MCPServer

__all__ = ["main", "serve"]
__all__ = ["MCPServer", "main"]
12 changes: 10 additions & 2 deletions mcp_server/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
from asyncio import CancelledError, run as asyncio_run
from contextlib import suppress as contextlib_suppress
from os import environ as os_environ
from pathlib import Path

from .server import serve
from yaml import safe_load as yaml_safe_load

from .server import MCPServer
from .tools import tool_python, tool_web


def main() -> None:
Expand All @@ -28,8 +32,12 @@ def main() -> None:
if args.user_agent:
os_environ["USER_AGENT"] = args.user_agent

config = yaml_safe_load(Path("tools.yaml").read_text(encoding="utf-8"))
config["tools"]["python"]["method"] = tool_python
config["tools"]["web"]["method"] = tool_web
server = MCPServer(config)
with contextlib_suppress(KeyboardInterrupt, CancelledError):
asyncio_run(serve())
asyncio_run(server.serve())


if __name__ == "__main__":
Expand Down
160 changes: 96 additions & 64 deletions mcp_server/server.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
"""Core server implementation for the MCP fetch service.
"""Core MCPServer implementation for the MCP fetch service.
Implements a Model Context Protocol server that fetches and processes web content.
Supports both standard I/O and Server-Sent Events (SSE) transport modes, with
content extraction powered by trafilatura for efficient web scraping.
Provides a generic MCPServer class for serving MCP requests. Allows drop-in tool support by mapping
tool functions to configuration loaded from an external YAML file.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from os import getenv as os_getenv
from typing import TYPE_CHECKING
from pathlib import Path
from typing import TYPE_CHECKING, Any

from mcp.server import Server
from mcp.server import Server as BaseMCPServer
from mcp.server.sse import SseServerTransport
from mcp.server.stdio import stdio_server
from mcp.shared.exceptions import McpError
Expand All @@ -19,76 +20,107 @@
from starlette.routing import Mount, Route
from uvicorn import Config as UvicornConfig, Server as UvicornServer

from .tools import TOOLS, tool_python, tool_web

if TYPE_CHECKING:
from starlette.requests import Request
from starlette.responses import Response


async def serve() -> None:
"""Run the fetch MCP server."""
server = Server("mcp-fetch")

@server.list_tools()
async def list_tools() -> list[Tool]:
# Default path for tool configuration YAML file
DEFAULT_TOOL_CONFIG_PATH = Path(__file__).parent / "tools.yaml"


@dataclass(slots=True)
class MCPServer:
"""Define a generic MCP server class with drop-in tool support."""

config: dict[str, Any]
server: BaseMCPServer = field(init=False)
server_name: str = field(default="mcp-server")
tools: list[Tool] = field(default_factory=list)

def __post_init__(self) -> None:
"""Initialise the MCPServer."""
if self.config.get("server", {}).get("name"):
self.server_name = self.config["server"]["name"]
# Create MCP server instance
self.server = BaseMCPServer(self.server_name)
# Build the tool registry and tool list
self.tools = [
Tool(name=name, **{k: v for k, v in tool.items() if k != "method"})
for name, tool in self.config["tools"].items()
]
# Register the tool listing/calling methods
self.server.list_tools()(self.list_tools)
self.server.call_tool()(self.call_tool)

async def list_tools(self) -> list[Tool]:
"""Return a list of available tools.
Returns:
A list of Tool objects representing the available tools.
"""
return TOOLS

@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
"""Call the specified tool with provided arguments.
return self.tools

Args:
name: The name of the tool to call.
arguments: A dictionary of arguments for the tool.
async def call_tool(self, name: str, arguments: dict) -> list[TextContent]:
"""Call the tool specified by name with provided arguments.
Returns:
A list of TextContent objects containing the fetched results.
A list of TextContent objects containing the tool's result
Raises:
McpError: If the tool is unknown or fails to execute.
McpError: If the tool is unknown or fails to execute
"""
for tool_name, tool_func in {"python": tool_python, "web": tool_web}.items():
if name == tool_name:
try:
return [TextContent(type="text", text=await tool_func(**arguments))]
except McpError as err:
raise McpError(ErrorData(code=INVALID_PARAMS, message=str(err))) from err
# Otherwise, the tool is unknown
raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown tool: {name}"))

options = server.create_initialization_options()
sse_host, sse_port = os_getenv("SSE_HOST"), os_getenv("SSE_PORT")
if sse_host and sse_port:
sse = SseServerTransport("/messages/")

async def handle_sse(request: Request) -> Response | None:
"""Handle the Server-Sent Events (SSE) connection.
Args:
request: The incoming HTTP request.
"""
async with sse.connect_sse(request.scope, request.receive, request._send) as streams: # noqa: SLF001
await server.run(streams[0], streams[1], options, raise_exceptions=True)

starlette_app = Starlette(
debug=True,
routes=[
Route("/sse", endpoint=handle_sse),
Mount("/messages/", app=sse.handle_post_message),
],
)

config = UvicornConfig(
app=starlette_app, host=sse_host, port=int(sse_port), log_level="info"
)
server_instance = UvicornServer(config)
await server_instance.serve()
else:
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, options, raise_exceptions=True)
if name not in self.config["tools"]:
raise McpError(
ErrorData(
code=INVALID_PARAMS,
message=f"Tool '{name}' isn't available on this server anymore",
)
)
if "method" not in self.config["tools"][name]:
raise McpError(
ErrorData(
code=INVALID_PARAMS,
message=(
f"Tool '{name}' has no registered method: inform the user that their MCP "
"server requires configuration to provide a function for this tool."
),
)
)
try:
result = await self.config["tools"][name]["method"](**arguments)
return [TextContent(type="text", text=result)]
except McpError as err:
raise McpError(ErrorData(code=INVALID_PARAMS, message=str(err))) from err

async def serve(self) -> None:
"""Run the MCP server, using either SSE or stdio mode."""
options = self.server.create_initialization_options()
sse_host, sse_port = os_getenv("SSE_HOST"), os_getenv("SSE_PORT")
if sse_host and sse_port:
sse = SseServerTransport("/messages/")

async def _handle_sse(request: Request) -> Response | None:
"""Handle incoming SSE connection."""
async with sse.connect_sse(
request.scope,
request.receive,
request._send, # noqa: SLF001
) as streams:
await self.server.run(streams[0], streams[1], options, raise_exceptions=True)

starlette_app = Starlette(
debug=True,
routes=[
Route("/sse", endpoint=_handle_sse),
Mount("/messages/", app=sse.handle_post_message),
],
)

config = UvicornConfig(
app=starlette_app, host=sse_host, port=int(sse_port), log_level="info"
)
server_instance = UvicornServer(config)
await server_instance.serve()
else:
async with stdio_server() as (read_stream, write_stream):
await self.server.run(read_stream, write_stream, options, raise_exceptions=True)
59 changes: 29 additions & 30 deletions mcp_server/tools/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,67 +12,66 @@
Tool(
name="web",
description=(
"Use to access the internet when up-to-date information may help. You can navigate "
"documentation, or fetch code and data from the web, so use it whenever fresh "
"information from the internet could potentially improve the accuracy of your answer "
"to the user."
"Your knowledge is out of date and potentially flawed. This tool lets you access and "
"process web content to enhance your responses. Use this tool to:\n"
"- Check current documentation when answering questions\n"
"- Fetch example code or data to demonstrate solutions\n"
"- Navigate through documentation using extracted links\n"
"- Verify information before making recommendations"
),
inputSchema={
"type": "object",
"properties": {
"url": {
"type": "string",
"description": (
"The URL to access. This can be any public web address, an API GET "
"endpoint, or even a location of a text/code file on GitHub, etc."
"URL to access - could be a web page, API endpoint, or a file on GitHub, "
"etc."
),
},
"mode": {
"type": "string",
"enum": ["markdown", "raw", "links"],
"default": "markdown",
"description": (
"Determines how to process the content:\n"
"'markdown' formats a HTML page into efficient markdown, removing headers, "
"navigation, ads, etc, so ideal for normal web pages;\n"
"'raw' returns the unprocessed content, if you need to see raw HTML, or "
"code, XML, JSON, etc.;\n"
"'links' extracts a list of hyperlinks (with anchor text) from a HTML "
"page, which can help you understand site structure or navigate "
"documentation."
"How to process the content:\n"
"'markdown': Convert HTML to clean markdown (best for reading)\n"
"'raw': Get unprocessed content (for non-HTML such as code, JSON, etc)\n"
"'links': Extract hyperlinks from a webpage with anchor text, which can be "
"combined with the markdown mode for navigation around a website, e.g. to "
"locate details in a repository or documentation site."
),
},
"max_length": {
"type": "integer",
"default": 0,
"description": (
"Limits the number of characters returned. A value of 0 means no limit. "
"You could use this if you're only interested in the start of a file, but "
"it's better to err on the side of having more context."
),
"description": "Limit response length in characters (zero means no limit)",
},
},
"required": ["url", "mode"],
"required": ["url"],
},
),
Tool(
name="python",
description=(
"Execute or lint Python code in a resource-limited sandbox.\n"
"It has internet access, with aiodns, aiohttp, bs4, numpy, pandas, and requests "
"installed, so you can now test and solve a number of problems without needing to "
"directly calculate it yourself.\n"
"Depending on your input parameters, this tool either runs the code or lints with "
"Ruff, so you can test code before running, or use Ruff to help debugging if you get "
"errors. The user can see the code you've submitted and the raw returned response, but "
"it's good etiquette to briefly summarise after using this tool what you asked for and "
"got back."
"Execute code in a Python 3.13 sandbox to demonstrate concepts and calculate results. "
"Instead of writing example code for users to run, use this tool directly to:\n"
"- Show pandas/numpy operations with real data\n"
"- Calculate results that would be tedious manually\n"
"- Demonstrate and verify working code examples\n\n"
"Includes: numpy, pandas, requests, bs4, aiodns, aiohttp. Can either run code or lint "
"with Ruff. The user can see your code and its output, but the output is not well "
"formatted, so it's good practice to briefly explain what you did and what the results "
"show.\n\n"
"When showing code or output to users, format it appropriately in markdown:\n"
"- Use ``` backticks for code and console output\n"
"- Use tables, lists or other markdown for structured data like pandas output"
),
inputSchema={
"type": "object",
"properties": {
"code": {"type": "string", "description": "Python code to use"},
"timeout": {
"time_limit": {
"type": "integer",
"default": 10,
"description": "Timeout in seconds for execution (ignored when linting)",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies = [
"aiohttp>=3.11.12",
"beautifulsoup4>=4.13.3",
"mcp>=1.2.1",
"pyyaml>=6.0.2",
"trafilatura>=2.0.0",
"uvicorn>=0.34.0",
]
Expand Down
Loading

0 comments on commit ae9f08e

Please sign in to comment.