Spaces:
Running
Running
Commit
·
b7c3fee
1
Parent(s):
1f9050d
make it adhere to chatgpt spec
Browse files- src/index.ts +41 -128
- src/utils.ts +97 -0
src/index.ts
CHANGED
|
@@ -2,71 +2,62 @@
|
|
| 2 |
|
| 3 |
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
| 4 |
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
|
|
|
| 5 |
import {
|
| 6 |
CallToolRequestSchema,
|
| 7 |
ListToolsRequestSchema,
|
| 8 |
Tool,
|
| 9 |
} from "@modelcontextprotocol/sdk/types.js";
|
| 10 |
-
import { z } from "zod";
|
| 11 |
import { createServer, IncomingMessage, ServerResponse } from "node:http";
|
| 12 |
import { URL } from "node:url";
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
| 20 |
});
|
| 21 |
|
| 22 |
-
const
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
dataset: z.string().describe("The dataset ID (e.g., 'ibm/duorc')"),
|
| 29 |
-
config: z.string().optional().describe("The dataset configuration name (optional)"),
|
| 30 |
});
|
| 31 |
|
| 32 |
const TOOL_DEFS: Tool[] = [
|
| 33 |
{
|
| 34 |
-
name: "
|
| 35 |
description: "Search for datasets on Hugging Face Hub",
|
| 36 |
inputSchema: {
|
| 37 |
type: "object",
|
| 38 |
properties: {
|
| 39 |
-
query: {
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
},
|
| 45 |
-
{
|
| 46 |
-
name: "get_dataset_info",
|
| 47 |
-
description: "Get detailed information about a specific dataset including splits, configs, and features",
|
| 48 |
-
inputSchema: {
|
| 49 |
-
type: "object",
|
| 50 |
-
properties: {
|
| 51 |
-
dataset: { type: "string", description: "The dataset ID (e.g., 'ibm/duorc')" },
|
| 52 |
-
config: { type: "string", description: "The dataset configuration name (optional)" },
|
| 53 |
},
|
| 54 |
-
required: ["
|
| 55 |
},
|
| 56 |
},
|
| 57 |
{
|
| 58 |
-
name: "
|
| 59 |
-
description:
|
|
|
|
| 60 |
inputSchema: {
|
| 61 |
type: "object",
|
| 62 |
properties: {
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
},
|
| 69 |
-
required: ["
|
| 70 |
},
|
| 71 |
},
|
| 72 |
];
|
|
@@ -83,98 +74,20 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
| 83 |
|
| 84 |
try {
|
| 85 |
switch (name) {
|
| 86 |
-
case "
|
| 87 |
-
const validatedArgs =
|
| 88 |
-
const { query
|
| 89 |
-
|
| 90 |
-
const url = new URL("https://huggingface.co/api/datasets");
|
| 91 |
-
const params = new URLSearchParams({ limit: String(limit) });
|
| 92 |
-
if (query) {
|
| 93 |
-
params.set("search", query);
|
| 94 |
-
}
|
| 95 |
-
url.search = params.toString();
|
| 96 |
|
| 97 |
-
const
|
| 98 |
-
|
| 99 |
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
| 100 |
-
}
|
| 101 |
-
const datasets = await response.json();
|
| 102 |
-
|
| 103 |
-
const results = datasets.map((dataset: any) => ({
|
| 104 |
-
id: dataset.id,
|
| 105 |
-
title: dataset.id,
|
| 106 |
-
url: `https://huggingface.co/datasets/${dataset.id}`,
|
| 107 |
-
description: dataset.description || "",
|
| 108 |
-
author: dataset.author,
|
| 109 |
-
downloads: dataset.downloads || 0,
|
| 110 |
-
likes: dataset.likes || 0,
|
| 111 |
-
tags: dataset.tags || [],
|
| 112 |
-
}));
|
| 113 |
-
|
| 114 |
-
return {
|
| 115 |
-
content: [{
|
| 116 |
-
type: "text",
|
| 117 |
-
text: JSON.stringify({ results }, null, 2)
|
| 118 |
-
}]
|
| 119 |
-
};
|
| 120 |
-
}
|
| 121 |
-
|
| 122 |
-
case "get_dataset_info": {
|
| 123 |
-
const validatedArgs = GetDatasetInfoSchema.parse(args);
|
| 124 |
-
const { dataset, config } = validatedArgs;
|
| 125 |
-
|
| 126 |
-
const url = new URL("https://datasets-server.huggingface.co/info");
|
| 127 |
-
const params = new URLSearchParams({ dataset });
|
| 128 |
-
if (config) {
|
| 129 |
-
params.set("config", config);
|
| 130 |
-
}
|
| 131 |
-
url.search = params.toString();
|
| 132 |
-
|
| 133 |
-
const response = await fetch(url);
|
| 134 |
-
if (!response.ok) {
|
| 135 |
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
| 136 |
-
}
|
| 137 |
-
const result = await response.json();
|
| 138 |
-
|
| 139 |
-
const info = {
|
| 140 |
-
id: dataset,
|
| 141 |
-
title: dataset,
|
| 142 |
-
text: JSON.stringify(result.dataset_info, null, 2),
|
| 143 |
-
url: `https://huggingface.co/datasets/${dataset}`,
|
| 144 |
-
metadata: {
|
| 145 |
-
source: "huggingface_datasets_server",
|
| 146 |
-
config: config,
|
| 147 |
-
partial: result.partial,
|
| 148 |
-
}
|
| 149 |
-
};
|
| 150 |
-
|
| 151 |
-
return {
|
| 152 |
-
content: [{
|
| 153 |
-
type: "text",
|
| 154 |
-
text: JSON.stringify(info, null, 2)
|
| 155 |
-
}]
|
| 156 |
-
};
|
| 157 |
}
|
| 158 |
|
| 159 |
-
case "
|
| 160 |
-
const validatedArgs =
|
| 161 |
-
const {
|
| 162 |
-
|
| 163 |
-
const url = new URL("https://datasets-server.huggingface.co/rows");
|
| 164 |
-
url.search = new URLSearchParams({
|
| 165 |
-
dataset,
|
| 166 |
-
config,
|
| 167 |
-
split,
|
| 168 |
-
offset: String(offset),
|
| 169 |
-
length: String(length),
|
| 170 |
-
}).toString();
|
| 171 |
|
| 172 |
-
const
|
| 173 |
-
|
| 174 |
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
| 175 |
-
}
|
| 176 |
-
const result = await response.json();
|
| 177 |
-
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
| 178 |
}
|
| 179 |
|
| 180 |
default:
|
|
|
|
| 2 |
|
| 3 |
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
| 4 |
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
| 5 |
+
import { z } from "zod";
|
| 6 |
import {
|
| 7 |
CallToolRequestSchema,
|
| 8 |
ListToolsRequestSchema,
|
| 9 |
Tool,
|
| 10 |
} from "@modelcontextprotocol/sdk/types.js";
|
|
|
|
| 11 |
import { createServer, IncomingMessage, ServerResponse } from "node:http";
|
| 12 |
import { URL } from "node:url";
|
| 13 |
+
import { searchDatasets, fetchDatasetAggregate, textContent } from "./utils.ts";
|
| 14 |
+
|
| 15 |
+
const SearchSchema = z.object({
|
| 16 |
+
query: z
|
| 17 |
+
.string()
|
| 18 |
+
.describe(
|
| 19 |
+
"Search query for datasets (e.g., 'nlp sentiment analysis', 'computer vision', 'question answering')",
|
| 20 |
+
),
|
| 21 |
});
|
| 22 |
|
| 23 |
+
const FetchSchema = z.object({
|
| 24 |
+
id: z
|
| 25 |
+
.string()
|
| 26 |
+
.describe(
|
| 27 |
+
"Unique dataset identifier (e.g., 'squad', 'imdb', 'cfahlgren1/hub-stats')",
|
| 28 |
+
),
|
|
|
|
|
|
|
| 29 |
});
|
| 30 |
|
| 31 |
const TOOL_DEFS: Tool[] = [
|
| 32 |
{
|
| 33 |
+
name: "search",
|
| 34 |
description: "Search for datasets on Hugging Face Hub",
|
| 35 |
inputSchema: {
|
| 36 |
type: "object",
|
| 37 |
properties: {
|
| 38 |
+
query: {
|
| 39 |
+
type: "string",
|
| 40 |
+
description:
|
| 41 |
+
"Search query for datasets (e.g., 'nlp sentiment analysis', 'computer vision', 'question answering')",
|
| 42 |
+
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
},
|
| 44 |
+
required: ["query"],
|
| 45 |
},
|
| 46 |
},
|
| 47 |
{
|
| 48 |
+
name: "fetch",
|
| 49 |
+
description:
|
| 50 |
+
"Retrieve full information and sample data for a specific dataset",
|
| 51 |
inputSchema: {
|
| 52 |
type: "object",
|
| 53 |
properties: {
|
| 54 |
+
id: {
|
| 55 |
+
type: "string",
|
| 56 |
+
description:
|
| 57 |
+
"Unique dataset identifier (e.g., 'squad', 'imdb', 'cfahlgren1/hub-stats')",
|
| 58 |
+
},
|
| 59 |
},
|
| 60 |
+
required: ["id"],
|
| 61 |
},
|
| 62 |
},
|
| 63 |
];
|
|
|
|
| 74 |
|
| 75 |
try {
|
| 76 |
switch (name) {
|
| 77 |
+
case "search": {
|
| 78 |
+
const validatedArgs = SearchSchema.parse(args);
|
| 79 |
+
const { query } = validatedArgs;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
const results = await searchDatasets(query);
|
| 82 |
+
return textContent({ results });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
+
case "fetch": {
|
| 86 |
+
const validatedArgs = FetchSchema.parse(args);
|
| 87 |
+
const { id: datasetId } = validatedArgs;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
const fetchResult = await fetchDatasetAggregate(datasetId);
|
| 90 |
+
return textContent(fetchResult);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
}
|
| 92 |
|
| 93 |
default:
|
src/utils.ts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { URL } from "node:url";
|
| 2 |
+
|
| 3 |
+
export function buildUrl(base: string, params: Record<string, string>): URL {
|
| 4 |
+
const url = new URL(base);
|
| 5 |
+
url.search = new URLSearchParams(params).toString();
|
| 6 |
+
return url;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
export async function fetchJson<T = any>(url: URL, context: string): Promise<T> {
|
| 10 |
+
const response = await fetch(url);
|
| 11 |
+
if (!response.ok) {
|
| 12 |
+
throw new Error(`${context}: HTTP ${response.status}`);
|
| 13 |
+
}
|
| 14 |
+
return response.json();
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
export async function getDefaultConfigAndSplit(datasetId: string): Promise<{ config: string; split: string }> {
|
| 18 |
+
const configsUrl = buildUrl("https://datasets-server.huggingface.co/configs", { dataset: datasetId });
|
| 19 |
+
const configsResult = await fetchJson<any>(configsUrl, "Failed to get configs");
|
| 20 |
+
|
| 21 |
+
const firstConfig = configsResult.configs?.[0]?.config as string | undefined;
|
| 22 |
+
if (!firstConfig) {
|
| 23 |
+
throw new Error("No configs found for dataset");
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
const infoUrl = buildUrl("https://datasets-server.huggingface.co/info", {
|
| 27 |
+
dataset: datasetId,
|
| 28 |
+
config: firstConfig,
|
| 29 |
+
});
|
| 30 |
+
const infoResult = await fetchJson<any>(infoUrl, "Failed to get dataset info");
|
| 31 |
+
|
| 32 |
+
const splits = infoResult.dataset_info?.splits || {};
|
| 33 |
+
const firstSplit = Object.keys(splits)[0];
|
| 34 |
+
if (!firstSplit) {
|
| 35 |
+
throw new Error("No splits found for dataset");
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
return { config: firstConfig, split: firstSplit };
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
export async function searchDatasets(query: string): Promise<Array<{ id: string; title: string; url: string }>> {
|
| 42 |
+
const url = buildUrl("https://huggingface.co/api/datasets", { search: query, limit: "20" });
|
| 43 |
+
const datasets = await fetchJson<any[]>(url, "Dataset search failed");
|
| 44 |
+
return datasets.map((dataset: any) => ({
|
| 45 |
+
id: dataset.id,
|
| 46 |
+
title: dataset.id,
|
| 47 |
+
url: `https://huggingface.co/datasets/${dataset.id}`,
|
| 48 |
+
}));
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
export async function fetchDatasetAggregate(datasetId: string): Promise<{
|
| 52 |
+
id: string;
|
| 53 |
+
title: string;
|
| 54 |
+
text: string;
|
| 55 |
+
url: string;
|
| 56 |
+
}> {
|
| 57 |
+
const { config, split } = await getDefaultConfigAndSplit(datasetId);
|
| 58 |
+
|
| 59 |
+
const infoUrl = buildUrl("https://datasets-server.huggingface.co/info", {
|
| 60 |
+
dataset: datasetId,
|
| 61 |
+
config,
|
| 62 |
+
});
|
| 63 |
+
const infoResult = await fetchJson<any>(infoUrl, "Failed to get dataset info");
|
| 64 |
+
|
| 65 |
+
const rowsUrl = buildUrl("https://datasets-server.huggingface.co/rows", {
|
| 66 |
+
dataset: datasetId,
|
| 67 |
+
config,
|
| 68 |
+
split,
|
| 69 |
+
offset: "0",
|
| 70 |
+
length: "50",
|
| 71 |
+
});
|
| 72 |
+
|
| 73 |
+
let sampleData = "";
|
| 74 |
+
try {
|
| 75 |
+
const rowsResponse = await fetch(rowsUrl);
|
| 76 |
+
if (rowsResponse.ok) {
|
| 77 |
+
const rowsResult = await rowsResponse.json();
|
| 78 |
+
sampleData = `\n\nSample data (${config}/${split}):\n${JSON.stringify(rowsResult.rows || [], null, 2)}`;
|
| 79 |
+
} else {
|
| 80 |
+
sampleData = "\n\nSample data: Not available";
|
| 81 |
+
}
|
| 82 |
+
} catch {
|
| 83 |
+
sampleData = "\n\nSample data: Not available";
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
return {
|
| 87 |
+
id: datasetId,
|
| 88 |
+
title: datasetId,
|
| 89 |
+
text: `Dataset Information:\n${JSON.stringify(infoResult.dataset_info, null, 2)}${sampleData}`,
|
| 90 |
+
url: `https://huggingface.co/datasets/${datasetId}`,
|
| 91 |
+
};
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
export function textContent(payload: unknown) {
|
| 95 |
+
return { content: [{ type: "text" as const, text: JSON.stringify(payload) }] };
|
| 96 |
+
}
|
| 97 |
+
|