From d732d16a4adf0b304d658761fa627da701a48d8c Mon Sep 17 00:00:00 2001 From: tommaso-moro Date: Fri, 26 Jun 2026 11:00:07 +0100 Subject: [PATCH 1/2] Add fields param to search_code and get_file_contents Add an optional `fields` array parameter to the `search_code` and `get_file_contents` tools so callers can request only the fields they need, reducing tool response size and context usage. - search_code: filters each result item to the selected fields while preserving the total_count / incomplete_results wrapper. - get_file_contents: filters each directory entry when listing a directory; ignored for single-file responses. Adds shared filterFields / filterEachField helpers and per-tool field enums, plus unit tests and regenerated toolsnaps and docs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 2 + .../__toolsnaps__/get_file_contents.snap | 18 ++++++ pkg/github/__toolsnaps__/search_code.snap | 14 +++++ pkg/github/minimal_types.go | 53 ++++++++++++++++ pkg/github/repositories.go | 23 ++++++- pkg/github/repositories_test.go | 63 +++++++++++++++++++ pkg/github/search.go | 27 +++++++- pkg/github/search_test.go | 57 +++++++++++++++++ 8 files changed, 255 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 973b926d66..eed74761cb 100644 --- a/README.md +++ b/README.md @@ -1290,6 +1290,7 @@ The following sets of tools are available: - **get_file_contents** - Get file or directory contents - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'. (string[], optional) - `owner`: Repository owner (username or organization) (string, required) - `path`: Path to file/directory (string, optional) - `ref`: Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head` (string, optional) @@ -1364,6 +1365,7 @@ The following sets of tools are available: - **search_code** - Search code - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data. (string[], optional) - `order`: Sort order for results (string, optional) - `page`: Page number for pagination (min 1) (number, optional) - `perPage`: Results per page for pagination (min 1, max 100) (number, optional) diff --git a/pkg/github/__toolsnaps__/get_file_contents.snap b/pkg/github/__toolsnaps__/get_file_contents.snap index 94b7aeedac..5e021db5e5 100644 --- a/pkg/github/__toolsnaps__/get_file_contents.snap +++ b/pkg/github/__toolsnaps__/get_file_contents.snap @@ -6,6 +6,24 @@ "description": "Get the contents of a file or directory from a GitHub repository", "inputSchema": { "properties": { + "fields": { + "description": "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + "items": { + "enum": [ + "type", + "name", + "path", + "size", + "sha", + "url", + "git_url", + "html_url", + "download_url" + ], + "type": "string" + }, + "type": "array" + }, "owner": { "description": "Repository owner (username or organization)", "type": "string" diff --git a/pkg/github/__toolsnaps__/search_code.snap b/pkg/github/__toolsnaps__/search_code.snap index 79cbbf04e9..c4a8b11bee 100644 --- a/pkg/github/__toolsnaps__/search_code.snap +++ b/pkg/github/__toolsnaps__/search_code.snap @@ -6,6 +6,20 @@ "description": "Fast and precise code search across ALL GitHub repositories using GitHub's native search engine. Best for finding exact symbols, functions, classes, or specific code patterns.", "inputSchema": { "properties": { + "fields": { + "description": "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + "items": { + "enum": [ + "name", + "path", + "sha", + "repository", + "text_matches" + ], + "type": "string" + }, + "type": "array" + }, "order": { "description": "Sort order for results", "enum": [ diff --git a/pkg/github/minimal_types.go b/pkg/github/minimal_types.go index 256bdcb911..467fcda2c0 100644 --- a/pkg/github/minimal_types.go +++ b/pkg/github/minimal_types.go @@ -1,6 +1,8 @@ package github import ( + "bytes" + "encoding/json" "fmt" "net/url" "strconv" @@ -12,6 +14,57 @@ import ( "github.com/github/github-mcp-server/pkg/sanitize" ) +// codeSearchItemFieldEnum lists the selectable fields for search_code result +// items, matching the JSON field names of MinimalCodeResult. The repository and +// text_matches fields are the heaviest, so omitting them is the main lever for +// shrinking large result sets. +var codeSearchItemFieldEnum = []any{"name", "path", "sha", "repository", "text_matches"} + +// fileContentFieldEnum lists the selectable fields for get_file_contents +// directory listings, matching the JSON field names of +// github.RepositoryContent that appear for directory entries. Only applied when +// the requested path is a directory; ignored for single files. +var fileContentFieldEnum = []any{"type", "name", "path", "size", "sha", "url", "git_url", "html_url", "download_url"} + +// filterFields marshals v to a JSON object and returns a map containing only the +// requested fields. Fields that are unknown or absent from the JSON (for example +// empty values dropped via omitempty) are skipped. +func filterFields(v any, fields []string) (map[string]any, error) { + data, err := json.Marshal(v) + if err != nil { + return nil, err + } + + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.UseNumber() // preserve integer precision for fields such as IDs + var object map[string]any + if err := decoder.Decode(&object); err != nil { + return nil, err + } + + picked := make(map[string]any, len(fields)) + for _, field := range fields { + if value, ok := object[field]; ok { + picked[field] = value + } + } + return picked, nil +} + +// filterEachField applies filterFields to every item, returning a slice in which +// each element contains only the requested fields. +func filterEachField[T any](items []T, fields []string) ([]map[string]any, error) { + filtered := make([]map[string]any, 0, len(items)) + for _, item := range items { + picked, err := filterFields(item, fields) + if err != nil { + return nil, err + } + filtered = append(filtered, picked) + } + return filtered, nil +} + // MinimalUser is the output type for user and organization search results. type MinimalUser struct { Login string `json:"login"` diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 949a180081..8526b888bb 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -728,6 +728,14 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool Type: "string", Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", }, + "fields": { + Type: "array", + Description: "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: fileContentFieldEnum, + }, + }, }, Required: []string{"owner", "repo"}, }, @@ -760,6 +768,11 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return utils.NewToolResultError(err.Error()), nil, nil } + fields, err := OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } + client, err := deps.GetClient(ctx) if err != nil { return utils.NewToolResultError("failed to get GitHub client"), nil, nil @@ -883,7 +896,15 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return attachIFC(utils.NewToolResultResource(fmt.Sprintf("successfully downloaded binary file (SHA: %s)%s", fileSHA, successNote), result)), nil, nil } else if dirContent != nil { // file content or file SHA is nil which means it's a directory - r, err := json.Marshal(dirContent) + var payload any = dirContent + if len(fields) > 0 { + filtered, err := filterEachField(dirContent, fields) + if err != nil { + return utils.NewToolResultErrorFromErr("failed to filter directory contents", err), nil, nil + } + payload = filtered + } + r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultError("failed to marshal response"), nil, nil } diff --git a/pkg/github/repositories_test.go b/pkg/github/repositories_test.go index e5531cc55b..5f93a9682f 100644 --- a/pkg/github/repositories_test.go +++ b/pkg/github/repositories_test.go @@ -480,6 +480,69 @@ func Test_GetFileContents(t *testing.T) { } } +func Test_GetFileContents_DirectoryFieldFiltering(t *testing.T) { + mockDirContent := []*github.RepositoryContent{ + { + Type: github.Ptr("file"), + Name: github.Ptr("README.md"), + Path: github.Ptr("README.md"), + SHA: github.Ptr("abc123"), + Size: github.Ptr(42), + URL: github.Ptr("https://api.github.com/repos/owner/repo/contents/README.md"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/README.md"), + DownloadURL: github.Ptr("https://raw.githubusercontent.com/owner/repo/main/README.md"), + }, + { + Type: github.Ptr("dir"), + Name: github.Ptr("src"), + Path: github.Ptr("src"), + SHA: github.Ptr("def456"), + HTMLURL: github.Ptr("https://github.com/owner/repo/tree/main/src"), + }, + } + + serverTool := GetFileContents(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetReposByOwnerByRepo: mockResponse(t, http.StatusOK, "{\"name\": \"repo\", \"default_branch\": \"main\"}"), + GetReposGitRefByOwnerByRepoByRef: mockResponse(t, http.StatusOK, "{\"ref\": \"refs/heads/main\", \"object\": {\"sha\": \"\"}}"), + GetReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{}).andThen( + mockResponse(t, http.StatusOK, mockDirContent), + ), + GetRawReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{"branch": "main"}).andThen( + mockResponse(t, http.StatusNotFound, nil), + ), + })) + deps := BaseDeps{Client: client} + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "owner": "owner", + "repo": "repo", + "path": "src/", + "fields": []any{"name", "type"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + textContent := getTextResult(t, result) + + // Each directory entry is reduced to the requested fields only; heavier + // fields such as html_url and download_url are dropped. + var returned []map[string]any + require.NoError(t, json.Unmarshal([]byte(textContent.Text), &returned)) + require.Len(t, returned, len(mockDirContent)) + for _, entry := range returned { + require.Len(t, entry, 2) + assert.Contains(t, entry, "name") + assert.Contains(t, entry, "type") + } + + assert.NotContains(t, textContent.Text, "html_url") + assert.NotContains(t, textContent.Text, "download_url") +} + func Test_GetFileContents_IFC_InsidersMode(t *testing.T) { t.Parallel() diff --git a/pkg/github/search.go b/pkg/github/search.go index 23ccbd8387..7ffade4201 100644 --- a/pkg/github/search.go +++ b/pkg/github/search.go @@ -209,6 +209,14 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Description: "Sort order for results", Enum: []any{"asc", "desc"}, }, + "fields": { + Type: "array", + Description: "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: codeSearchItemFieldEnum, + }, + }, }, Required: []string{"query"}, } @@ -239,6 +247,10 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { if err != nil { return utils.NewToolResultError(err.Error()), nil, nil } + fields, err := OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } pagination, err := OptionalPaginationParams(args) if err != nil { return utils.NewToolResultError(err.Error()), nil, nil @@ -297,7 +309,20 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Items: minimalItems, } - r, err := json.Marshal(minimalResult) + var payload any = minimalResult + if len(fields) > 0 { + filteredItems, err := filterEachField(minimalItems, fields) + if err != nil { + return utils.NewToolResultErrorFromErr("failed to filter code search results", err), nil, nil + } + payload = map[string]any{ + "total_count": minimalResult.TotalCount, + "incomplete_results": minimalResult.IncompleteResults, + "items": filteredItems, + } + } + + r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultErrorFromErr("failed to marshal response", err), nil, nil } diff --git a/pkg/github/search_test.go b/pkg/github/search_test.go index 5ebf60842a..f0eeb882b8 100644 --- a/pkg/github/search_test.go +++ b/pkg/github/search_test.go @@ -509,6 +509,63 @@ func Test_SearchCode(t *testing.T) { } } +func Test_SearchCode_FieldFiltering(t *testing.T) { + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("file1.go"), + Path: github.Ptr("path/to/file1.go"), + SHA: github.Ptr("abc123def456"), + Repository: &github.Repository{ + Name: github.Ptr("repo"), + FullName: github.Ptr("owner/repo"), + }, + TextMatches: []*github.TextMatch{ + {Fragment: github.Ptr("func main() {}")}, + }, + }, + }, + } + + serverTool := SearchCode(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetSearchCode: mockResponse(t, http.StatusOK, mockSearchResult), + })) + deps := BaseDeps{Client: client} + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + "fields": []any{"name", "path"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + textContent := getTextResult(t, result) + + // The wrapper metadata is preserved while each item is reduced to the + // requested fields only; the heavier repository and text_matches data is + // dropped. + var returned struct { + TotalCount int `json:"total_count"` + IncompleteResults bool `json:"incomplete_results"` + Items []map[string]any `json:"items"` + } + require.NoError(t, json.Unmarshal([]byte(textContent.Text), &returned)) + assert.Equal(t, 1, returned.TotalCount) + require.Len(t, returned.Items, 1) + require.Len(t, returned.Items[0], 2) + assert.Contains(t, returned.Items[0], "name") + assert.Contains(t, returned.Items[0], "path") + + assert.NotContains(t, textContent.Text, "repository") + assert.NotContains(t, textContent.Text, "text_matches") +} + func Test_SearchUsers(t *testing.T) { // Verify tool definition once serverTool := SearchUsers(translations.NullTranslationHelper) From b19ce6ae0dbe6a182c714ff7b24f83f929e11189 Mon Sep 17 00:00:00 2001 From: tommaso-moro Date: Tue, 30 Jun 2026 17:59:06 +0200 Subject: [PATCH 2/2] Gate fields param behind fields_param flag and add usage telemetry Register search_code and get_file_contents as two mutually exclusive variants gated by the new `fields_param` feature flag, following the existing dual-variant flag pattern: - The flag-enabled variant advertises the optional `fields` parameter and filters each result to the requested subset. It owns the `_ff_fields_param` toolsnap. - The Legacy* variant exposes the original schema with no `fields` parameter and never filters, acting as a kill switch when the flag is off. It owns the canonical toolsnap. Add best-effort, low-cardinality telemetry at each tool's filter point to measure adoption and realized savings: - `mcp.fields.tool_call` (increment) tagged by tool and whether the response was filtered. - `mcp.fields.bytes_full` / `bytes_sent` / `bytes_saved` (counters) tagged by tool, emitted only when a response was filtered. Tags are limited to `tool` and `filtered` to bound cardinality; repo, owner, user, query, and the requested field list are never tagged. The local server discards these via the noop metrics sink, while hosted deployments inject a real sink. Metrics accessors now fall back to a noop sink when no exporter is configured so emitting telemetry never panics. Co-authored-by: Copilot App <223556219+Copilot@users.noreply.github.com> --- README.md | 2 - docs/feature-flags.md | 20 +++ .../__toolsnaps__/get_file_contents.snap | 18 --- .../get_file_contents_ff_fields_param.snap | 56 +++++++ pkg/github/__toolsnaps__/search_code.snap | 14 -- .../search_code_ff_fields_param.snap | 57 ++++++++ pkg/github/dependencies.go | 6 + pkg/github/feature_flags.go | 9 ++ pkg/github/fields_telemetry.go | 54 +++++++ pkg/github/fields_telemetry_test.go | 138 ++++++++++++++++++ pkg/github/repositories.go | 137 +++++++++++------ pkg/github/repositories_test.go | 75 +++++++++- pkg/github/search.go | 77 ++++++++-- pkg/github/search_test.go | 93 +++++++++++- pkg/github/tools.go | 2 + 15 files changed, 667 insertions(+), 91 deletions(-) create mode 100644 pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap create mode 100644 pkg/github/__toolsnaps__/search_code_ff_fields_param.snap create mode 100644 pkg/github/fields_telemetry.go create mode 100644 pkg/github/fields_telemetry_test.go diff --git a/README.md b/README.md index eed74761cb..973b926d66 100644 --- a/README.md +++ b/README.md @@ -1290,7 +1290,6 @@ The following sets of tools are available: - **get_file_contents** - Get file or directory contents - **Required OAuth Scopes**: `repo` - - `fields`: Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'. (string[], optional) - `owner`: Repository owner (username or organization) (string, required) - `path`: Path to file/directory (string, optional) - `ref`: Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head` (string, optional) @@ -1365,7 +1364,6 @@ The following sets of tools are available: - **search_code** - Search code - **Required OAuth Scopes**: `repo` - - `fields`: Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data. (string[], optional) - `order`: Sort order for results (string, optional) - `page`: Page number for pagination (min 1) (number, optional) - `perPage`: Results per page for pagination (min 1, max 100) (number, optional) diff --git a/docs/feature-flags.md b/docs/feature-flags.md index ce3c32e35f..2915c53f2b 100644 --- a/docs/feature-flags.md +++ b/docs/feature-flags.md @@ -334,4 +334,24 @@ runtime behavior (such as output formatting) won't appear here. - 'blocked_by' - the subject issue is blocked by the related issue. - 'blocking' - the subject issue blocks the related issue. (string, required) +### `fields_param` + +- **get_file_contents** - Get file or directory contents + - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'. (string[], optional) + - `owner`: Repository owner (username or organization) (string, required) + - `path`: Path to file/directory (string, optional) + - `ref`: Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head` (string, optional) + - `repo`: Repository name (string, required) + - `sha`: Accepts optional commit SHA. If specified, it will be used instead of ref (string, optional) + +- **search_code** - Search code + - **Required OAuth Scopes**: `repo` + - `fields`: Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data. (string[], optional) + - `order`: Sort order for results (string, optional) + - `page`: Page number for pagination (min 1) (number, optional) + - `perPage`: Results per page for pagination (min 1, max 100) (number, optional) + - `query`: Search query (GitHub code search REST). Implicit AND between terms; supports `OR`, `NOT`, and `"quoted phrase"` for exact match. Qualifiers: `repo:owner/repo`, `org:`, `user:`, `language:`, `path:dir` (prefix match), `filename:exact.ext`, `extension:`, `in:file`, `in:path`, `size:`, `is:archived`, `is:fork`. Max 256 chars. Examples: `WithContext language:go org:github`; `"package main" repo:o/r`; `func extension:go path:cmd repo:o/r`; `NOT TODO language:go repo:o/r`. (string, required) + - `sort`: Sort field ('indexed' only) (string, optional) + diff --git a/pkg/github/__toolsnaps__/get_file_contents.snap b/pkg/github/__toolsnaps__/get_file_contents.snap index 5e021db5e5..94b7aeedac 100644 --- a/pkg/github/__toolsnaps__/get_file_contents.snap +++ b/pkg/github/__toolsnaps__/get_file_contents.snap @@ -6,24 +6,6 @@ "description": "Get the contents of a file or directory from a GitHub repository", "inputSchema": { "properties": { - "fields": { - "description": "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", - "items": { - "enum": [ - "type", - "name", - "path", - "size", - "sha", - "url", - "git_url", - "html_url", - "download_url" - ], - "type": "string" - }, - "type": "array" - }, "owner": { "description": "Repository owner (username or organization)", "type": "string" diff --git a/pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap b/pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap new file mode 100644 index 0000000000..5e021db5e5 --- /dev/null +++ b/pkg/github/__toolsnaps__/get_file_contents_ff_fields_param.snap @@ -0,0 +1,56 @@ +{ + "annotations": { + "readOnlyHint": true, + "title": "Get file or directory contents" + }, + "description": "Get the contents of a file or directory from a GitHub repository", + "inputSchema": { + "properties": { + "fields": { + "description": "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + "items": { + "enum": [ + "type", + "name", + "path", + "size", + "sha", + "url", + "git_url", + "html_url", + "download_url" + ], + "type": "string" + }, + "type": "array" + }, + "owner": { + "description": "Repository owner (username or organization)", + "type": "string" + }, + "path": { + "default": "/", + "description": "Path to file/directory", + "type": "string" + }, + "ref": { + "description": "Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head`", + "type": "string" + }, + "repo": { + "description": "Repository name", + "type": "string" + }, + "sha": { + "description": "Accepts optional commit SHA. If specified, it will be used instead of ref", + "type": "string" + } + }, + "required": [ + "owner", + "repo" + ], + "type": "object" + }, + "name": "get_file_contents" +} \ No newline at end of file diff --git a/pkg/github/__toolsnaps__/search_code.snap b/pkg/github/__toolsnaps__/search_code.snap index c4a8b11bee..79cbbf04e9 100644 --- a/pkg/github/__toolsnaps__/search_code.snap +++ b/pkg/github/__toolsnaps__/search_code.snap @@ -6,20 +6,6 @@ "description": "Fast and precise code search across ALL GitHub repositories using GitHub's native search engine. Best for finding exact symbols, functions, classes, or specific code patterns.", "inputSchema": { "properties": { - "fields": { - "description": "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", - "items": { - "enum": [ - "name", - "path", - "sha", - "repository", - "text_matches" - ], - "type": "string" - }, - "type": "array" - }, "order": { "description": "Sort order for results", "enum": [ diff --git a/pkg/github/__toolsnaps__/search_code_ff_fields_param.snap b/pkg/github/__toolsnaps__/search_code_ff_fields_param.snap new file mode 100644 index 0000000000..c4a8b11bee --- /dev/null +++ b/pkg/github/__toolsnaps__/search_code_ff_fields_param.snap @@ -0,0 +1,57 @@ +{ + "annotations": { + "readOnlyHint": true, + "title": "Search code" + }, + "description": "Fast and precise code search across ALL GitHub repositories using GitHub's native search engine. Best for finding exact symbols, functions, classes, or specific code patterns.", + "inputSchema": { + "properties": { + "fields": { + "description": "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + "items": { + "enum": [ + "name", + "path", + "sha", + "repository", + "text_matches" + ], + "type": "string" + }, + "type": "array" + }, + "order": { + "description": "Sort order for results", + "enum": [ + "asc", + "desc" + ], + "type": "string" + }, + "page": { + "description": "Page number for pagination (min 1)", + "minimum": 1, + "type": "number" + }, + "perPage": { + "description": "Results per page for pagination (min 1, max 100)", + "maximum": 100, + "minimum": 1, + "type": "number" + }, + "query": { + "description": "Search query (GitHub code search REST). Implicit AND between terms; supports `OR`, `NOT`, and `\"quoted phrase\"` for exact match. Qualifiers: `repo:owner/repo`, `org:`, `user:`, `language:`, `path:dir` (prefix match), `filename:exact.ext`, `extension:`, `in:file`, `in:path`, `size:`, `is:archived`, `is:fork`. Max 256 chars. Examples: `WithContext language:go org:github`; `\"package main\" repo:o/r`; `func extension:go path:cmd repo:o/r`; `NOT TODO language:go repo:o/r`.", + "type": "string" + }, + "sort": { + "description": "Sort field ('indexed' only)", + "type": "string" + } + }, + "required": [ + "query" + ], + "type": "object" + }, + "name": "search_code" +} \ No newline at end of file diff --git a/pkg/github/dependencies.go b/pkg/github/dependencies.go index 1141fbce89..bbc125ca68 100644 --- a/pkg/github/dependencies.go +++ b/pkg/github/dependencies.go @@ -193,6 +193,9 @@ func (d BaseDeps) Logger(_ context.Context) *slog.Logger { // Metrics implements ToolDependencies. func (d BaseDeps) Metrics(ctx context.Context) metrics.Metrics { + if d.Obsv == nil { + return metrics.NewNoopMetrics() + } return d.Obsv.Metrics(ctx) } @@ -423,6 +426,9 @@ func (d *RequestDeps) Logger(_ context.Context) *slog.Logger { // Metrics implements ToolDependencies. func (d *RequestDeps) Metrics(ctx context.Context) metrics.Metrics { + if d.obsv == nil { + return metrics.NewNoopMetrics() + } return d.obsv.Metrics(ctx) } diff --git a/pkg/github/feature_flags.go b/pkg/github/feature_flags.go index fdb113585a..3b6334a4ea 100644 --- a/pkg/github/feature_flags.go +++ b/pkg/github/feature_flags.go @@ -23,6 +23,14 @@ const FeatureFlagFileBlame = "file_blame" // unless explicitly opted in. const FeatureFlagIssueDependencies = "issue_dependencies" +// FeatureFlagFieldsParam is the feature flag name for the optional `fields` +// parameter on selected read tools (for example search_code and +// get_file_contents). When enabled, those tools advertise `fields` and filter +// each result to the requested subset, reducing response size. It is gated so +// the feature can be rolled out gradually and disabled as a kill switch without +// a redeploy. +const FeatureFlagFieldsParam = "fields_param" + // AllowedFeatureFlags is the allowlist of feature flags that can be enabled // by users via --features CLI flag or X-MCP-Features HTTP header. // Only flags in this list are accepted; unknown flags are silently ignored. @@ -35,6 +43,7 @@ var AllowedFeatureFlags = []string{ FeatureFlagPullRequestsGranular, FeatureFlagFileBlame, FeatureFlagIssueDependencies, + FeatureFlagFieldsParam, } // InsidersFeatureFlags is the list of feature flags that insiders mode enables. diff --git a/pkg/github/fields_telemetry.go b/pkg/github/fields_telemetry.go new file mode 100644 index 0000000000..324f463bec --- /dev/null +++ b/pkg/github/fields_telemetry.go @@ -0,0 +1,54 @@ +package github + +import ( + "context" + "strconv" +) + +// Metric names for the optional `fields` response-filtering feature. They let a +// dashboard answer two questions on real traffic: how often the model actually +// filters (adoption) and how many bytes that filtering removes (effectiveness). +// +// Cardinality is kept deliberately low: the only tags ever attached are `tool` +// (a small fixed set of tool names) and `filtered` (a boolean). Unbounded values +// such as repository, owner, user, the query, or the requested field list are +// never used as tags. +const ( + metricFieldsToolCall = "mcp.fields.tool_call" + metricFieldsBytesFull = "mcp.fields.bytes_full" + metricFieldsBytesSent = "mcp.fields.bytes_sent" + metricFieldsBytesSaved = "mcp.fields.bytes_saved" +) + +// recordFieldsUsage emits telemetry for a single call to a tool that supports +// the `fields` parameter. It is best-effort: the local server wires a no-op +// metrics sink, while hosted deployments inject a real sink. +// +// Every call increments mcp.fields.tool_call tagged by tool and whether the +// response was filtered, which yields the adoption rate (filtered / total). When +// the response was filtered, it also records the unfiltered (fullBytes) and +// returned (sentBytes) payload sizes plus their difference, which yields the +// realized savings. Byte counters are only emitted for filtered calls so that +// "percent saved" (bytes_saved / bytes_full) is computed over the population +// where filtering actually applied. +func recordFieldsUsage(ctx context.Context, deps ToolDependencies, tool string, filtered bool, fullBytes, sentBytes int) { + m := deps.Metrics(ctx) + if m == nil { + return + } + + m.Increment(metricFieldsToolCall, map[string]string{ + "tool": tool, + "filtered": strconv.FormatBool(filtered), + }) + + if !filtered { + return + } + + toolTag := map[string]string{"tool": tool} + saved := max(fullBytes-sentBytes, 0) + m.Counter(metricFieldsBytesFull, toolTag, int64(fullBytes)) + m.Counter(metricFieldsBytesSent, toolTag, int64(sentBytes)) + m.Counter(metricFieldsBytesSaved, toolTag, int64(saved)) +} diff --git a/pkg/github/fields_telemetry_test.go b/pkg/github/fields_telemetry_test.go new file mode 100644 index 0000000000..0ac3891edd --- /dev/null +++ b/pkg/github/fields_telemetry_test.go @@ -0,0 +1,138 @@ +package github + +import ( + "context" + "log/slog" + "sync" + "testing" + "time" + + "github.com/github/github-mcp-server/pkg/observability" + "github.com/github/github-mcp-server/pkg/observability/metrics" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// recordingMetrics is a metrics.Metrics implementation that captures emitted +// metrics so tests can assert on telemetry. It is safe for concurrent use. +type recordingMetrics struct { + mu sync.Mutex + increments []recordedMetric + counters []recordedMetric +} + +type recordedMetric struct { + key string + tags map[string]string + value int64 +} + +func (m *recordingMetrics) Increment(key string, tags map[string]string) { + m.mu.Lock() + defer m.mu.Unlock() + m.increments = append(m.increments, recordedMetric{key: key, tags: tags, value: 1}) +} + +func (m *recordingMetrics) Counter(key string, tags map[string]string, value int64) { + m.mu.Lock() + defer m.mu.Unlock() + m.counters = append(m.counters, recordedMetric{key: key, tags: tags, value: value}) +} + +func (m *recordingMetrics) Distribution(_ string, _ map[string]string, _ float64) {} +func (m *recordingMetrics) DistributionMs(_ string, _ map[string]string, _ time.Duration) {} +func (m *recordingMetrics) WithTags(_ map[string]string) metrics.Metrics { return m } + +// counter returns the recorded counter for the given key, or false if absent. +func (m *recordingMetrics) counter(key string) (recordedMetric, bool) { + m.mu.Lock() + defer m.mu.Unlock() + for _, c := range m.counters { + if c.key == key { + return c, true + } + } + return recordedMetric{}, false +} + +// increment returns the recorded increment for the given key, or false if absent. +func (m *recordingMetrics) increment(key string) (recordedMetric, bool) { + m.mu.Lock() + defer m.mu.Unlock() + for _, c := range m.increments { + if c.key == key { + return c, true + } + } + return recordedMetric{}, false +} + +// depsWithRecordingMetrics returns BaseDeps wired with a recording metrics sink +// plus the sink for assertions. +func depsWithRecordingMetrics(t *testing.T, base BaseDeps) (BaseDeps, *recordingMetrics) { + t.Helper() + rec := &recordingMetrics{} + exporters, err := observability.NewExporters(slog.New(slog.DiscardHandler), rec) + require.NoError(t, err) + base.Obsv = exporters + return base, rec +} + +func Test_recordFieldsUsage_Filtered(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{}) + + recordFieldsUsage(context.Background(), deps, "search_code", true, 100, 30) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "search_code", call.tags["tool"]) + assert.Equal(t, "true", call.tags["filtered"]) + + full, ok := rec.counter(metricFieldsBytesFull) + require.True(t, ok) + assert.Equal(t, int64(100), full.value) + assert.Equal(t, "search_code", full.tags["tool"]) + assert.NotContains(t, full.tags, "filtered") + + sent, ok := rec.counter(metricFieldsBytesSent) + require.True(t, ok) + assert.Equal(t, int64(30), sent.value) + + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Equal(t, int64(70), saved.value) +} + +func Test_recordFieldsUsage_NotFiltered(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{}) + + recordFieldsUsage(context.Background(), deps, "search_code", false, 100, 100) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "false", call.tags["filtered"]) + + // No byte counters are emitted when the response was not filtered. + _, ok = rec.counter(metricFieldsBytesFull) + assert.False(t, ok) + _, ok = rec.counter(metricFieldsBytesSaved) + assert.False(t, ok) +} + +func Test_recordFieldsUsage_ClampsNegativeSavings(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{}) + + // sent larger than full should never yield negative savings. + recordFieldsUsage(context.Background(), deps, "get_file_contents", true, 10, 25) + + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Equal(t, int64(0), saved.value) +} + +func Test_recordFieldsUsage_NilExporterDoesNotPanic(t *testing.T) { + // BaseDeps with no Obsv falls back to a noop sink rather than panicking. + assert.NotPanics(t, func() { + recordFieldsUsage(context.Background(), BaseDeps{}, "search_code", true, 100, 30) + }) +} diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index 8526b888bb..2e0430ac05 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -693,8 +693,73 @@ func FetchRepoIsPrivate(ctx context.Context, client *github.Client, owner, repo return r.GetPrivate(), nil } -// GetFileContents creates a tool to get the contents of a file or directory from a GitHub repository. +// GetFileContents creates a tool to get the contents of a file or directory from +// a GitHub repository. It is the FeatureFlagFieldsParam-enabled variant: it +// advertises the optional `fields` parameter and filters directory listings to +// the requested subset. Both this and LegacyGetFileContents register under the +// tool name "get_file_contents"; exactly one is active for any given request +// thanks to mutually exclusive FeatureFlagEnable / FeatureFlagDisable annotations. func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool { + st := getFileContentsTool(t, true) + st.FeatureFlagEnable = FeatureFlagFieldsParam + return st +} + +// LegacyGetFileContents is the FeatureFlagFieldsParam-disabled variant of +// get_file_contents. It exposes the original schema (no `fields` parameter) and +// never filters directory listings, so it acts as the kill switch when the flag +// is off. It owns the canonical get_file_contents.snap; the flag-enabled variant +// owns get_file_contents_ff_.snap. Delete this function when the flag is +// removed. +func LegacyGetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool { + st := getFileContentsTool(t, false) + st.FeatureFlagDisable = []string{FeatureFlagFieldsParam} + return st +} + +// getFileContentsTool builds the get_file_contents tool. When includeFields is +// true the tool advertises the optional `fields` parameter, filters directory +// listings to the requested subset, and emits fields telemetry. When false it is +// the original tool with no fields parameter and no filtering. +func getFileContentsTool(t translations.TranslationHelperFunc, includeFields bool) inventory.ServerTool { + schema := &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "owner": { + Type: "string", + Description: "Repository owner (username or organization)", + }, + "repo": { + Type: "string", + Description: "Repository name", + }, + "path": { + Type: "string", + Description: "Path to file/directory", + Default: json.RawMessage(`"/"`), + }, + "ref": { + Type: "string", + Description: "Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head`", + }, + "sha": { + Type: "string", + Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", + }, + }, + Required: []string{"owner", "repo"}, + } + if includeFields { + schema.Properties["fields"] = &jsonschema.Schema{ + Type: "array", + Description: "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: fileContentFieldEnum, + }, + } + } + return NewTool( ToolsetMetadataRepos, mcp.Tool{ @@ -704,41 +769,7 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool Title: t("TOOL_GET_FILE_CONTENTS_USER_TITLE", "Get file or directory contents"), ReadOnlyHint: true, }, - InputSchema: &jsonschema.Schema{ - Type: "object", - Properties: map[string]*jsonschema.Schema{ - "owner": { - Type: "string", - Description: "Repository owner (username or organization)", - }, - "repo": { - Type: "string", - Description: "Repository name", - }, - "path": { - Type: "string", - Description: "Path to file/directory", - Default: json.RawMessage(`"/"`), - }, - "ref": { - Type: "string", - Description: "Accepts optional git refs such as `refs/tags/{tag}`, `refs/heads/{branch}` or `refs/pull/{pr_number}/head`", - }, - "sha": { - Type: "string", - Description: "Accepts optional commit SHA. If specified, it will be used instead of ref", - }, - "fields": { - Type: "array", - Description: "Subset of fields to return for each entry when the path is a directory. If omitted, all fields are returned. Ignored when the path is a single file. Use this to reduce response size when listing directories and you only need specific fields, e.g. just 'name' and 'type'.", - Items: &jsonschema.Schema{ - Type: "string", - Enum: fileContentFieldEnum, - }, - }, - }, - Required: []string{"owner", "repo"}, - }, + InputSchema: schema, }, []scopes.Scope{scopes.Repo}, func(ctx context.Context, deps ToolDependencies, _ *mcp.CallToolRequest, args map[string]any) (*mcp.CallToolResult, any, error) { @@ -768,9 +799,12 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return utils.NewToolResultError(err.Error()), nil, nil } - fields, err := OptionalStringArrayParam(args, "fields") - if err != nil { - return utils.NewToolResultError(err.Error()), nil, nil + var fields []string + if includeFields { + fields, err = OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } } client, err := deps.GetClient(ctx) @@ -896,18 +930,23 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool return attachIFC(utils.NewToolResultResource(fmt.Sprintf("successfully downloaded binary file (SHA: %s)%s", fileSHA, successNote), result)), nil, nil } else if dirContent != nil { // file content or file SHA is nil which means it's a directory + filtered := false var payload any = dirContent - if len(fields) > 0 { - filtered, err := filterEachField(dirContent, fields) + if includeFields && len(fields) > 0 { + filteredEntries, err := filterEachField(dirContent, fields) if err != nil { return utils.NewToolResultErrorFromErr("failed to filter directory contents", err), nil, nil } - payload = filtered + payload = filteredEntries + filtered = true } r, err := json.Marshal(payload) if err != nil { return utils.NewToolResultError("failed to marshal response"), nil, nil } + if includeFields { + recordDirContentsFieldsUsage(ctx, deps, dirContent, filtered, len(r)) + } return attachIFC(utils.NewToolResultText(string(r))), nil, nil } @@ -916,6 +955,20 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool ) } +// recordDirContentsFieldsUsage emits fields telemetry for a get_file_contents +// directory listing. sentBytes is the size of the payload actually returned. +// When the listing was filtered, the unfiltered size is computed from the full +// directory content so the realized savings can be measured. +func recordDirContentsFieldsUsage(ctx context.Context, deps ToolDependencies, full []*github.RepositoryContent, filtered bool, sentBytes int) { + fullBytes := sentBytes + if filtered { + if data, err := json.Marshal(full); err == nil { + fullBytes = len(data) + } + } + recordFieldsUsage(ctx, deps, "get_file_contents", filtered, fullBytes, sentBytes) +} + // ForkRepository creates a tool to fork a repository. func ForkRepository(t translations.TranslationHelperFunc) inventory.ServerTool { return NewTool( diff --git a/pkg/github/repositories_test.go b/pkg/github/repositories_test.go index 5f93a9682f..136aa93696 100644 --- a/pkg/github/repositories_test.go +++ b/pkg/github/repositories_test.go @@ -27,7 +27,11 @@ func Test_GetFileContents(t *testing.T) { // Verify tool definition once serverTool := GetFileContents(translations.NullTranslationHelper) tool := serverTool.Tool - require.NoError(t, toolsnaps.Test(tool.Name, tool)) + // GetFileContents is the FeatureFlagFieldsParam-enabled variant; it owns the + // _ff_ snapshot. The canonical get_file_contents.snap is owned by + // LegacyGetFileContents (see Test_LegacyGetFileContents_Definition). + require.NoError(t, toolsnaps.Test(tool.Name+"_ff_"+FeatureFlagFieldsParam, tool)) + require.Equal(t, FeatureFlagFieldsParam, serverTool.FeatureFlagEnable) schema, ok := tool.InputSchema.(*jsonschema.Schema) require.True(t, ok, "InputSchema should be *jsonschema.Schema") @@ -39,6 +43,7 @@ func Test_GetFileContents(t *testing.T) { assert.Contains(t, schema.Properties, "path") assert.Contains(t, schema.Properties, "ref") assert.Contains(t, schema.Properties, "sha") + assert.Contains(t, schema.Properties, "fields") assert.ElementsMatch(t, schema.Required, []string{"owner", "repo"}) // Mock response for raw content @@ -543,6 +548,74 @@ func Test_GetFileContents_DirectoryFieldFiltering(t *testing.T) { assert.NotContains(t, textContent.Text, "download_url") } +func Test_LegacyGetFileContents_Definition(t *testing.T) { + serverTool := LegacyGetFileContents(translations.NullTranslationHelper) + tool := serverTool.Tool + // LegacyGetFileContents is the FeatureFlagFieldsParam-disabled variant and + // owns the canonical get_file_contents.snap (no `fields`). + require.NoError(t, toolsnaps.Test(tool.Name, tool)) + require.Equal(t, []string{FeatureFlagFieldsParam}, serverTool.FeatureFlagDisable) + + assert.Equal(t, "get_file_contents", tool.Name) + schema, ok := tool.InputSchema.(*jsonschema.Schema) + require.True(t, ok, "InputSchema should be *jsonschema.Schema") + assert.NotContains(t, schema.Properties, "fields") +} + +func Test_GetFileContents_DirectoryFieldsTelemetry(t *testing.T) { + mockDirContent := []*github.RepositoryContent{ + { + Type: github.Ptr("file"), + Name: github.Ptr("README.md"), + Path: github.Ptr("README.md"), + SHA: github.Ptr("abc123"), + Size: github.Ptr(42), + URL: github.Ptr("https://api.github.com/repos/owner/repo/contents/README.md"), + HTMLURL: github.Ptr("https://github.com/owner/repo/blob/main/README.md"), + DownloadURL: github.Ptr("https://raw.githubusercontent.com/owner/repo/main/README.md"), + }, + } + + serverTool := GetFileContents(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetReposByOwnerByRepo: mockResponse(t, http.StatusOK, "{\"name\": \"repo\", \"default_branch\": \"main\"}"), + GetReposGitRefByOwnerByRepoByRef: mockResponse(t, http.StatusOK, "{\"ref\": \"refs/heads/main\", \"object\": {\"sha\": \"\"}}"), + GetReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{}).andThen( + mockResponse(t, http.StatusOK, mockDirContent), + ), + GetRawReposContentsByOwnerByRepoByPath: expectQueryParams(t, map[string]string{"branch": "main"}).andThen( + mockResponse(t, http.StatusNotFound, nil), + ), + })) + deps, rec := depsWithRecordingMetrics(t, BaseDeps{Client: client}) + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "owner": "owner", + "repo": "repo", + "path": "src/", + "fields": []any{"name", "type"}, + }) + + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "get_file_contents", call.tags["tool"]) + assert.Equal(t, "true", call.tags["filtered"]) + + full, ok := rec.counter(metricFieldsBytesFull) + require.True(t, ok) + sent, ok := rec.counter(metricFieldsBytesSent) + require.True(t, ok) + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Greater(t, full.value, sent.value, "filtering should remove bytes") + assert.Equal(t, full.value-sent.value, saved.value) +} + func Test_GetFileContents_IFC_InsidersMode(t *testing.T) { t.Parallel() diff --git a/pkg/github/search.go b/pkg/github/search.go index 7ffade4201..e84f261873 100644 --- a/pkg/github/search.go +++ b/pkg/github/search.go @@ -191,8 +191,34 @@ func attachSearchRepositoriesIFCLabel(ctx context.Context, deps ToolDependencies setIFCLabel(callResult, ifc.LabelSearchIssues(visibilities)) } -// SearchCode creates a tool to search for code across GitHub repositories. +// SearchCode creates a tool to search for code across GitHub repositories. It is +// the FeatureFlagFieldsParam-enabled variant: it advertises the optional +// `fields` parameter and filters each result to the requested subset. Both this +// and LegacySearchCode register under the tool name "search_code"; exactly one +// is active for any given request thanks to mutually exclusive +// FeatureFlagEnable / FeatureFlagDisable annotations. func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { + st := searchCodeTool(t, true) + st.FeatureFlagEnable = FeatureFlagFieldsParam + return st +} + +// LegacySearchCode is the FeatureFlagFieldsParam-disabled variant of +// search_code. It exposes the original schema (no `fields` parameter) and never +// filters results, so it acts as the kill switch when the flag is off. It owns +// the canonical search_code.snap; the flag-enabled variant owns +// search_code_ff_.snap. Delete this function when the flag is removed. +func LegacySearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { + st := searchCodeTool(t, false) + st.FeatureFlagDisable = []string{FeatureFlagFieldsParam} + return st +} + +// searchCodeTool builds the search_code tool. When includeFields is true the +// tool advertises the optional `fields` parameter, filters each result to the +// requested subset, and emits fields telemetry. When false it is the original +// tool with no fields parameter and no filtering. +func searchCodeTool(t translations.TranslationHelperFunc, includeFields bool) inventory.ServerTool { schema := &jsonschema.Schema{ Type: "object", Properties: map[string]*jsonschema.Schema{ @@ -209,17 +235,19 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Description: "Sort order for results", Enum: []any{"asc", "desc"}, }, - "fields": { - Type: "array", - Description: "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", - Items: &jsonschema.Schema{ - Type: "string", - Enum: codeSearchItemFieldEnum, - }, - }, }, Required: []string{"query"}, } + if includeFields { + schema.Properties["fields"] = &jsonschema.Schema{ + Type: "array", + Description: "Subset of fields to return for each code search result. If omitted, all fields are returned. Use this to reduce response size when you only need specific fields; omitting 'repository' and 'text_matches' in particular drops the largest per-result data.", + Items: &jsonschema.Schema{ + Type: "string", + Enum: codeSearchItemFieldEnum, + }, + } + } WithPagination(schema) return NewTool( @@ -247,9 +275,12 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { if err != nil { return utils.NewToolResultError(err.Error()), nil, nil } - fields, err := OptionalStringArrayParam(args, "fields") - if err != nil { - return utils.NewToolResultError(err.Error()), nil, nil + var fields []string + if includeFields { + fields, err = OptionalStringArrayParam(args, "fields") + if err != nil { + return utils.NewToolResultError(err.Error()), nil, nil + } } pagination, err := OptionalPaginationParams(args) if err != nil { @@ -309,8 +340,9 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { Items: minimalItems, } + filtered := false var payload any = minimalResult - if len(fields) > 0 { + if includeFields && len(fields) > 0 { filteredItems, err := filterEachField(minimalItems, fields) if err != nil { return utils.NewToolResultErrorFromErr("failed to filter code search results", err), nil, nil @@ -320,6 +352,7 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { "incomplete_results": minimalResult.IncompleteResults, "items": filteredItems, } + filtered = true } r, err := json.Marshal(payload) @@ -327,6 +360,10 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { return utils.NewToolResultErrorFromErr("failed to marshal response", err), nil, nil } + if includeFields { + recordSearchCodeFieldsUsage(ctx, deps, minimalResult, filtered, len(r)) + } + callResult := utils.NewToolResultText(string(r)) // Code search spans repositories; the IFC label is the conservative // join across every matched repository's visibility, read directly @@ -343,6 +380,20 @@ func SearchCode(t translations.TranslationHelperFunc) inventory.ServerTool { ) } +// recordSearchCodeFieldsUsage emits fields telemetry for a search_code call. +// sentBytes is the size of the payload actually returned. When the response was +// filtered, the unfiltered size is computed from the full minimal result so the +// realized savings can be measured. +func recordSearchCodeFieldsUsage(ctx context.Context, deps ToolDependencies, full *MinimalCodeSearchResult, filtered bool, sentBytes int) { + fullBytes := sentBytes + if filtered { + if data, err := json.Marshal(full); err == nil { + fullBytes = len(data) + } + } + recordFieldsUsage(ctx, deps, "search_code", filtered, fullBytes, sentBytes) +} + func userOrOrgHandler(ctx context.Context, accountType string, deps ToolDependencies, args map[string]any) (*mcp.CallToolResult, any, error) { query, err := RequiredParam[string](args, "query") if err != nil { diff --git a/pkg/github/search_test.go b/pkg/github/search_test.go index f0eeb882b8..6217046e96 100644 --- a/pkg/github/search_test.go +++ b/pkg/github/search_test.go @@ -342,7 +342,11 @@ func Test_SearchCode(t *testing.T) { // Verify tool definition once serverTool := SearchCode(translations.NullTranslationHelper) tool := serverTool.Tool - require.NoError(t, toolsnaps.Test(tool.Name, tool)) + // SearchCode is the FeatureFlagFieldsParam-enabled variant; it owns the + // _ff_ snapshot. The canonical search_code.snap is owned by + // LegacySearchCode (see Test_LegacySearchCode_Definition). + require.NoError(t, toolsnaps.Test(tool.Name+"_ff_"+FeatureFlagFieldsParam, tool)) + require.Equal(t, FeatureFlagFieldsParam, serverTool.FeatureFlagEnable) assert.Equal(t, "search_code", tool.Name) assert.NotEmpty(t, tool.Description) @@ -354,6 +358,7 @@ func Test_SearchCode(t *testing.T) { assert.Contains(t, schema.Properties, "order") assert.Contains(t, schema.Properties, "perPage") assert.Contains(t, schema.Properties, "page") + assert.Contains(t, schema.Properties, "fields") assert.ElementsMatch(t, schema.Required, []string{"query"}) // Setup mock search results @@ -566,6 +571,92 @@ func Test_SearchCode_FieldFiltering(t *testing.T) { assert.NotContains(t, textContent.Text, "text_matches") } +func Test_LegacySearchCode_Definition(t *testing.T) { + serverTool := LegacySearchCode(translations.NullTranslationHelper) + tool := serverTool.Tool + // LegacySearchCode is the FeatureFlagFieldsParam-disabled variant and owns + // the canonical search_code.snap (no `fields`). + require.NoError(t, toolsnaps.Test(tool.Name, tool)) + require.Equal(t, []string{FeatureFlagFieldsParam}, serverTool.FeatureFlagDisable) + + assert.Equal(t, "search_code", tool.Name) + schema, ok := tool.InputSchema.(*jsonschema.Schema) + require.True(t, ok, "InputSchema should be *jsonschema.Schema") + assert.NotContains(t, schema.Properties, "fields") +} + +func Test_SearchCode_FieldsTelemetry(t *testing.T) { + mockSearchResult := &github.CodeSearchResult{ + Total: github.Ptr(1), + IncompleteResults: github.Ptr(false), + CodeResults: []*github.CodeResult{ + { + Name: github.Ptr("file1.go"), + Path: github.Ptr("path/to/file1.go"), + SHA: github.Ptr("abc123def456"), + Repository: &github.Repository{ + Name: github.Ptr("repo"), + FullName: github.Ptr("owner/repo"), + }, + TextMatches: []*github.TextMatch{ + {Fragment: github.Ptr("func main() {}")}, + }, + }, + }, + } + + serverTool := SearchCode(translations.NullTranslationHelper) + client := mustNewGHClient(t, MockHTTPClientWithHandlers(map[string]http.HandlerFunc{ + GetSearchCode: mockResponse(t, http.StatusOK, mockSearchResult), + })) + + t.Run("filtered call records savings", func(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{Client: client}) + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + "fields": []any{"name", "path"}, + }) + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "search_code", call.tags["tool"]) + assert.Equal(t, "true", call.tags["filtered"]) + + full, ok := rec.counter(metricFieldsBytesFull) + require.True(t, ok) + sent, ok := rec.counter(metricFieldsBytesSent) + require.True(t, ok) + saved, ok := rec.counter(metricFieldsBytesSaved) + require.True(t, ok) + assert.Greater(t, full.value, sent.value, "filtering should remove bytes") + assert.Equal(t, full.value-sent.value, saved.value) + }) + + t.Run("unfiltered call records adoption only", func(t *testing.T) { + deps, rec := depsWithRecordingMetrics(t, BaseDeps{Client: client}) + handler := serverTool.Handler(deps) + + request := createMCPRequest(map[string]any{ + "query": "fmt.Println language:go", + }) + result, err := handler(ContextWithDeps(context.Background(), deps), &request) + require.NoError(t, err) + require.False(t, result.IsError) + + call, ok := rec.increment(metricFieldsToolCall) + require.True(t, ok) + assert.Equal(t, "false", call.tags["filtered"]) + + _, ok = rec.counter(metricFieldsBytesFull) + assert.False(t, ok, "no byte counters when not filtered") + }) +} + func Test_SearchUsers(t *testing.T) { // Verify tool definition once serverTool := SearchUsers(translations.NullTranslationHelper) diff --git a/pkg/github/tools.go b/pkg/github/tools.go index e361a6cfa4..24419ed24f 100644 --- a/pkg/github/tools.go +++ b/pkg/github/tools.go @@ -182,8 +182,10 @@ func AllTools(t translations.TranslationHelperFunc) []inventory.ServerTool { // Repository tools SearchRepositories(t), GetFileContents(t), + LegacyGetFileContents(t), ListCommits(t), SearchCode(t), + LegacySearchCode(t), SearchCommits(t), GetCommit(t), GetFileBlame(t),