Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(context): Add Prompt Caching to Code Context (CODY-4807) #6878

Merged
merged 15 commits into from
Feb 1, 2025
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ log:
value: 1; mode=block
- name: strict-transport-security
value: max-age=31536000; includeSubDomains; preload
headersSize: 1436
headersSize: 1296
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -431,15 +431,25 @@ log:
value: null
url: https://sourcegraph.com/.api/graphql?CurrentSiteCodyLlmConfiguration
response:
bodySize: 259
bodySize: 248
content:
encoding: base64
mimeType: application/json
size: 259
text: "[\"H4sIAAAAAAAAAwAAAP//\",\"hM5NDoIwEAXgu8yaaoMQDVu2svMCYztAA3ZIf4yG9O4G\
NhI1cfWSyZsvbwaNAaGawZtASyrWz/O5qdm2posOg2G73nsMDWsaoQLP0SnqHE79Xo0\
YNYnDrhSeraUA2bvb4OPCA1kPVVFKKTNo0Yf6DyV6NEOEj/LGOq6U4ts00rLvF6aJJk\
80CMWanLjnYjSBxBU9wdfvxs5lcUoppRcAAAD//wMAMOH90BoBAAA=\"]"
size: 248
text: "[\"H4sIAAAAAAAAA4TOTQ6CMBAF4LvMmmqDEA1btrLzAmM7QAN2SH+MhvTuBjYSNXH1ksmbL\
28GjQGhmsGbQEsq1s/zuanZtqaLDoNhu957DA1rGqECz9Ep6hxO/V6NGDWJw64Unq2l\
ANm72+DjwgNZD1VRSikzaNGH+g8lejRDhI/yxjqulOLbNNKy7xemiSZPNAjFmpy452I\
0gcQVPcHX78bOZXFKKaUXAAAA//8DADDh/dAaAQAA\"]"
textDecoded:
data:
site:
codyLLMConfiguration:
chatModel: sourcegraph/claude-3.5-sonnet
chatModelMaxTokens: 45000
completionModel: sourcegraph/deepseek-coder-v2-lite-base
completionModelMaxTokens: 2048
fastChatModel: sourcegraph/claude-3-haiku
fastChatModelMaxTokens: 7000
cookies: []
headers:
- name: date
Expand Down Expand Up @@ -469,7 +479,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -574,7 +584,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -642,18 +652,13 @@ log:
value: null
url: https://sourcegraph.com/.api/graphql?CurrentSiteCodyLlmProvider
response:
bodySize: 128
bodySize: 131
content:
encoding: base64
mimeType: application/json
size: 128
text: "[\"H4sIAAAAAAAAA6pWSkksSVSyqlYqzixJBdHJ+SmVPj6+zvl5aZnppUWJJZn5eSDxgqL8s\
syU1CIlK6Xi/NKi5NT0osSCDKXa2tpaAAAAAP//AwAfFAXARQAAAA==\"]"
textDecoded:
data:
site:
codyLLMConfiguration:
provider: sourcegraph
size: 131
text: "[\"H4sIAAAAAAAAA6pWSkksSVSyqlY=\",\"Ks4sSQXRyfkplT4+vs75eWmZ6aVFiSWZ+Xkg\
8YKi/LLMlNQiJSul4vzSouTU9KLEggyl2traWgAAAAD//wMAHxQFwEUAAAA=\"]"
cookies: []
headers:
- name: date
Expand Down Expand Up @@ -683,7 +688,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -814,7 +819,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -933,7 +938,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -1039,7 +1044,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down Expand Up @@ -1105,24 +1110,16 @@ log:
value: null
url: https://sourcegraph.com/.api/graphql?ViewerSettings
response:
bodySize: 280
bodySize: 283
content:
encoding: base64
mimeType: application/json
size: 280
text: "[\"H4sIAAAAAAAAA4zPwUoDQRAE0H/pc75gbyoGAwrikttcOkk529D2LD292cRl/l0WAsGD4\
LXqUVALnTiYuoXOghneI0Is1zX5FGOljpZEuIxw+YIF6xYck6Mm6tbG+KB45e/ro5ZD\
f7Xgy4vkQSUP61KiLnzCJpFh7sF+HD5QJ42taMDrOxv0T1T3u3s3euFjyBm/xEPOjsw\
hxerd1n+Qocxvk4ao2G3yqVgtiptpm0RlhO3s+SRRfD3cGrXWfgAAAP//AwDHP3NmNg\
EAAA==\"]"
textDecoded:
data:
viewerSettings:
final: "{\"experimentalFeatures\":{\"enableLazyBlobSyntaxHighlighting\":true,\"\
newSearchResultFiltersPanel\":true,\"newSearchResultsUI\":tru\
e,\"proactiveSearchResultsAggregations\":true,\"searchResults\
Aggregations\":true,\"showMultilineSearchConsole\":true},\"op\
enInEditor\":{}}"
size: 283
text: "[\"H4sIAAAAAAAAA4zPwUoDQRAE0H8=\",\"6XO+YG8qBgMK4pLbXDpJOdvQ9iw9vdnEZf5d\
FgLBg+C16lFQC504mLqFzoIZ3iNCLNc1+RRjpY6WRLiMcPmCBesWHJOjJurWxvigeOX\
v66OWQ3+14MuL5EElD+tSoi58wiaRYe7Bfhw+UCeNrWjA6zsb9E9U97t7N3rhY8gZv8\
RDzo7MIcXq3dZ/kKHMb5OGqNht8qlYLYqbaZtEZYTt7PkkUXw93Bq11n4AAAD//wMAx\
z9zZjYBAAA=\"]"
cookies: []
headers:
- name: date
Expand Down Expand Up @@ -1152,7 +1149,7 @@ log:
value: max-age=31536000; includeSubDomains; preload
- name: content-encoding
value: gzip
headersSize: 1468
headersSize: 1328
httpVersion: HTTP/1.1
redirectURL: ""
status: 200
Expand Down
3 changes: 2 additions & 1 deletion lib/shared/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@ export class ChatClient {

// We only want to send up the speaker and prompt text, regardless of whatever other fields
// might be on the messages objects (`file`, `displayText`, `contextFiles`, etc.).
const messagesToSend = augmentedMessages.map(({ speaker, text }) => ({
const messagesToSend = augmentedMessages.map(({ speaker, text, cache_enabled }) => ({
text,
speaker,
cache_enabled,
}))

const completionParams = {
Expand Down
3 changes: 2 additions & 1 deletion lib/shared/src/codebase-context/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ export type ContextItemWithContent = ContextItem & { content: string }
/**
* A system chat message that adds a context item to the conversation.
*/
export interface ContextMessage extends Required<Message> {
export interface ContextMessage extends Required<Omit<Message, 'cache_enabled'>> {
/**
* Context messages are always "from" the human. (In the future, this could be from "system" for
* LLMs that support that kind of message, but that `speaker` value is not currently supported
Expand All @@ -243,6 +243,7 @@ export interface ContextMessage extends Required<Message> {
* The context item that this message introduces into the conversation.
*/
file: ContextItem
cache_enabled?: boolean | null
}

export const GENERAL_HELP_LABEL = 'Search for a file to include, or type # for symbols...'
Expand Down
16 changes: 15 additions & 1 deletion lib/shared/src/sourcegraph-api/clientConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ export interface CodyNotice {
//
// This is fetched from the Sourcegraph instance and is specific to the current user.
//
// For the canonical type definition, see https://sourcegraph.com/github.com/sourcegraph/sourcegraph/-/blob/internal/clientconfig/types.go
// For the canonical type definition, see model ClientConfig in https://sourcegraph.sourcegraph.com/github.com/sourcegraph/sourcegraph/-/blob/internal/openapi/internal.tsp
// API Spec: https://sourcegraph.sourcegraph.com/api/openapi/internal#get-api-client-config
export interface CodyClientConfig {
// Whether the site admin allows this user to make use of the Cody chat feature.
chatEnabled: boolean
Expand Down Expand Up @@ -73,6 +74,9 @@ export interface CodyClientConfig {

// Whether code search is enabled for the SG instance.
codeSearchEnabled: boolean

// The latest supported completions stream API version.
latestSupportedCompletionsStreamAPIVersion?: number
}

export const dummyClientConfigForTest: CodyClientConfig = {
Expand Down Expand Up @@ -317,6 +321,7 @@ export class ClientConfigSingleton {
if (isError(clientConfig)) {
throw clientConfig
}
latestCodyClientConfig = clientConfig
return clientConfig
})
}
Expand All @@ -329,3 +334,12 @@ export class ClientConfigSingleton {
return this.fetchConfigEndpoint(signal, config)
}
}
// It's really complicated to access CodyClientConfig from functions like utils.ts
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worth getting team alignment on what the singleton story should look like for CodyClientConfig. Currently, the interface is designed in a very poor way where it includes information from the server AND custom local settings. I think a cleaner solution is to separate the /.api/client-config settings from locally inferred settings. Going forward, we should stop using the site version as a feature gating signal and exclusively use /.api/client-config as you have done with api-version=7 in this PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocking comment, just want to call out that I'm not 100% happy with how the current code is organized

Copy link
Contributor Author

@julialeex julialeex Jan 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack. Can revisit this problem later.

export let latestCodyClientConfig: CodyClientConfig | undefined

export function serverSupportsPromptCaching(): boolean {
return (
latestCodyClientConfig?.latestSupportedCompletionsStreamAPIVersion !== undefined &&
latestCodyClientConfig?.latestSupportedCompletionsStreamAPIVersion >= 7
)
}
16 changes: 16 additions & 0 deletions lib/shared/src/sourcegraph-api/completions/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,23 @@ export type Event = DoneEvent | CompletionEvent | ErrorEvent
export interface Message {
// Note: The unified API only supports one system message passed as the first message
speaker: 'human' | 'assistant' | 'system'
// content used to be text, but starting from api-version 7, we require Cody clients to
// stop using text and send content to instead to respect the official API contract and
// mirrors what OpenAI and Anthropic expect
text?: PromptString
cache_enabled?: boolean | null
}

export interface CompletionUsage {
completion_tokens: number | null
prompt_tokens: number | null
total_tokens: number | null
prompt_tokens_details?: PromptTokensDetails | null
}

export interface PromptTokensDetails {
cached_tokens?: number | null
cache_read_input_tokens?: number | null
}

export interface CompletionResponse {
Expand Down
24 changes: 20 additions & 4 deletions lib/shared/src/sourcegraph-api/completions/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { type SerializedChatMessage, contextFiltersProvider } from '../..'
import { serverSupportsPromptCaching } from '../clientConfig'
import type { CompletionParameters, Message, SerializedCompletionParameters } from './types'

/**
Expand Down Expand Up @@ -26,9 +27,24 @@ async function serializePrompts(
}

return Promise.all(
messages.map(async m => ({
...m,
text: await m.text?.toFilteredString(contextFiltersProvider),
}))
messages.map(async m => {
const text = await m.text?.toFilteredString(contextFiltersProvider)
if (serverSupportsPromptCaching() && m.cache_enabled) {
return {
speaker: m.speaker,
content: [
{
type: 'text',
text: text ?? '',
cache_control: { type: 'ephemeral' },
},
],
}
}
return {
...m,
text: text,
}
})
)
}
Loading