diff --git a/lib/shared/src/chat/chat.ts b/lib/shared/src/chat/chat.ts index 48551d4c10df..a5432bd677d7 100644 --- a/lib/shared/src/chat/chat.ts +++ b/lib/shared/src/chat/chat.ts @@ -57,12 +57,13 @@ export class ChatClient { // We only want to send up the speaker and prompt text, regardless of whatever other fields // might be on the messages objects (`file`, `displayText`, `contextFiles`, etc.). - const messagesToSend = augmentedMessages.map(({ speaker, text }) => ({ + const messagesToSend = augmentedMessages.map(({ speaker, text, content }) => ({ text, speaker, + content, })) - const completionParams = { + const completionParams: CompletionParameters = { ...DEFAULT_CHAT_COMPLETION_PARAMETERS, ...params, messages: messagesToSend, @@ -107,8 +108,8 @@ export function sanitizeMessages(messages: Message[]): Message[] { // the next one const nextMessage = sanitizedMessages[index + 1] if ( - (nextMessage.speaker === 'assistant' && !nextMessage.text?.length) || - (message.speaker === 'assistant' && !message.text?.length) + (nextMessage.speaker === 'assistant' && !nextMessage.text?.length && !nextMessage.content) || + (message.speaker === 'assistant' && !message.text?.length && !message.content) ) { return false } diff --git a/lib/shared/src/chat/transcript/messages.ts b/lib/shared/src/chat/transcript/messages.ts index 144f5e1eceb1..74ffab45d537 100644 --- a/lib/shared/src/chat/transcript/messages.ts +++ b/lib/shared/src/chat/transcript/messages.ts @@ -22,6 +22,7 @@ export interface SubMessage { export interface ChatMessage extends Message { contextFiles?: ContextItem[] + base64Image?: string contextAlternatives?: RankedContext[] diff --git a/lib/shared/src/sourcegraph-api/completions/types.ts b/lib/shared/src/sourcegraph-api/completions/types.ts index d606152dbe2b..5c1211f2cacb 100644 --- a/lib/shared/src/sourcegraph-api/completions/types.ts +++ b/lib/shared/src/sourcegraph-api/completions/types.ts @@ -5,27 +5,28 @@ interface DoneEvent { type: 'done' } -interface CompletionEvent extends CompletionResponse { - type: 'completion' -} +// interface CompletionEvent extends CompletionResponse { +// type: 'completion' +// } interface ErrorEvent { type: 'error' error: string } -export type Event = DoneEvent | CompletionEvent | ErrorEvent +export type Event = DoneEvent | ErrorEvent export interface Message { // Note: The unified API only supports one system message passed as the first message speaker: 'human' | 'assistant' | 'system' text?: PromptString + content?: string | MessagePart[] + base64Image?: string } -export interface CompletionResponse { - completion: string - stopReason?: string -} +type MessagePart = + | { type: 'text'; text: string } // a normal text message + | { type: 'image_url'; image_url: { url: string } } // image message, per https://platform.openai.com/docs/guides/vision export interface CompletionParameters { fast?: boolean @@ -45,6 +46,7 @@ export interface CompletionParameters { type: 'content' content: string } + base64Image?: string } export interface SerializedCompletionParameters extends Omit { diff --git a/vscode/src/chat/chat-view/ChatBuilder.ts b/vscode/src/chat/chat-view/ChatBuilder.ts index c5dcab311e04..9a51f56821f4 100644 --- a/vscode/src/chat/chat-view/ChatBuilder.ts +++ b/vscode/src/chat/chat-view/ChatBuilder.ts @@ -157,7 +157,7 @@ export class ChatBuilder { if (this.messages.at(-1)?.speaker === 'human') { throw new Error('Cannot add a user message after a user message') } - this.messages.push({ ...message, speaker: 'human' }) + this.messages.push({ ...message, speaker: 'human', base64Image: this.getAndResetImage() }) this.changeNotifications.next() } @@ -322,6 +322,31 @@ export class ChatBuilder { } return result } + + /** + * Store the base64-encoded image uploaded by user to a multi-modal model. + * Requires vision support in the model, added in the PR + * https://github.com/sourcegraph/sourcegraph/pull/546 + */ + private image: string | undefined = undefined + + /** + * Sets the base64-encoded image for the chat model. + * @param base64Image - The base64-encoded image data to set. + */ + public setImage(base64Image: string): void { + this.image = base64Image + } + + /** + * Gets the base64-encoded image for the chat model and resets the internal image property to undefined. + * @returns The base64-encoded image, or undefined if no image has been set. + */ + public getAndResetImage(): string | undefined { + const image = this.image + this.image = undefined + return image + } } function messageToSerializedChatInteraction( diff --git a/vscode/src/chat/chat-view/ChatController.ts b/vscode/src/chat/chat-view/ChatController.ts index 3c7d560ef0cb..1aa2dbbe3f11 100644 --- a/vscode/src/chat/chat-view/ChatController.ts +++ b/vscode/src/chat/chat-view/ChatController.ts @@ -513,6 +513,11 @@ export class ChatController implements vscode.Disposable, vscode.WebviewViewProv } break } + + case 'chat/upload-file': { + this.chatBuilder.setImage(message.base64) + break + } case 'log': { const logger = message.level === 'debug' ? logDebug : logError logger(message.filterLabel, message.message) diff --git a/vscode/src/chat/chat-view/prompt.ts b/vscode/src/chat/chat-view/prompt.ts index d528559cc553..f9e18cd0e08e 100644 --- a/vscode/src/chat/chat-view/prompt.ts +++ b/vscode/src/chat/chat-view/prompt.ts @@ -89,6 +89,9 @@ export class DefaultPrompter { `Ignored ${messagesIgnored} chat messages due to context limit` ) } + for (const message of reverseTranscript) { + promptBuilder.tryAddImage(message.base64Image) + } // Counter for context items categorized by source const ignoredContext = { user: 0, corpus: 0, transcript: 0 } diff --git a/vscode/src/chat/protocol.ts b/vscode/src/chat/protocol.ts index 88c54d82f157..7c61aec93aee 100644 --- a/vscode/src/chat/protocol.ts +++ b/vscode/src/chat/protocol.ts @@ -156,6 +156,8 @@ export type WebviewMessage = selectedFilters: NLSSearchDynamicFilter[] } | { command: 'action/confirmation'; id: string; response: boolean } + | { command: 'log'; level: 'debug' | 'error'; filterLabel: string; message: string } + | { command: 'chat/upload-file'; base64: string } export interface SmartApplyResult { taskId: FixupTaskID diff --git a/vscode/src/completions/nodeClient.ts b/vscode/src/completions/nodeClient.ts index 4f2fc6500492..b4c196175658 100644 --- a/vscode/src/completions/nodeClient.ts +++ b/vscode/src/completions/nodeClient.ts @@ -9,7 +9,6 @@ import { type CompletionCallbacks, type CompletionParameters, type CompletionRequestParameters, - type CompletionResponse, NetworkError, RateLimitError, SourcegraphCompletionsClient, @@ -21,6 +20,7 @@ import { getTraceparentHeaders, globalAgentRef, isError, + logDebug, logError, onAbort, parseEvents, @@ -38,6 +38,10 @@ export class SourcegraphNodeCompletionsClient extends SourcegraphCompletionsClie signal?: AbortSignal ): Promise { const { apiVersion, interactionId } = requestParams + for (const message of params.messages) { + logDebug('apiVersion', JSON.stringify(apiVersion, null, 2)) + logDebug('base64Image', JSON.stringify(message, null, 2)) + } const url = new URL(await this.completionsEndpoint()) if (apiVersion >= 1) { @@ -326,7 +330,7 @@ export class SourcegraphNodeCompletionsClient extends SourcegraphCompletionsClie getActiveTraceAndSpanId()?.traceId ) } - const json = (await response.json()) as CompletionResponse + const json = await response.json() if (typeof json?.completion === 'string') { cb.onChange(json.completion) cb.onComplete() diff --git a/vscode/src/prompt-builder/index.ts b/vscode/src/prompt-builder/index.ts index cf55068e1fff..43ad5b7d633c 100644 --- a/vscode/src/prompt-builder/index.ts +++ b/vscode/src/prompt-builder/index.ts @@ -33,6 +33,7 @@ export class PromptBuilder { * A list of context items that are used to build context messages. */ public contextItems: ContextItem[] = [] + public images: string[] = [] /** * Convenience constructor because loading the tokenizer is async due to its large size. @@ -47,10 +48,28 @@ export class PromptBuilder { if (this.contextItems.length > 0) { this.buildContextMessages() } - + this.buildImageMessages() return this.prefixMessages.concat([...this.reverseMessages].reverse()) } + private buildImageMessages(): void { + for (const image of this.images) { + const imageMessage: Message = { + speaker: 'human', + content: [ + { + type: 'image_url', + image_url: { + // TODO: Handle PNG/JPEG, don't hardcode to JPEG + url: `data:image/jpeg;base64,${image}`, + }, + }, + ], + } + this.reverseMessages.push(...[ASSISTANT_MESSAGE, imageMessage]) + } + } + private buildContextMessages(): void { for (const item of this.contextItems) { // Create context messages for each context item, where @@ -108,6 +127,12 @@ export class PromptBuilder { return undefined } + public tryAddImage(base64Image: string | undefined): void { + if (base64Image) { + this.images.push(base64Image) + } + } + public async tryAddContext( type: ContextTokenUsageType | 'history', contextItems: ContextItem[] diff --git a/vscode/webviews/chat/cells/messageCell/human/editor/HumanMessageEditor.tsx b/vscode/webviews/chat/cells/messageCell/human/editor/HumanMessageEditor.tsx index 6bb138f536bf..e4769a35ebf9 100644 --- a/vscode/webviews/chat/cells/messageCell/human/editor/HumanMessageEditor.tsx +++ b/vscode/webviews/chat/cells/messageCell/human/editor/HumanMessageEditor.tsx @@ -31,6 +31,7 @@ import { import type { UserAccountInfo } from '../../../../../Chat' import { type ClientActionListener, useClientActionListener } from '../../../../../client/clientState' import { promptModeToIntent } from '../../../../../prompts/PromptsTab' +import { getVSCodeAPI } from '../../../../../utils/VSCodeApi' import { useTelemetryRecorder } from '../../../../../utils/telemetry' import { useFeatureFlag } from '../../../../../utils/useFeatureFlags' import { useLinkOpener } from '../../../../../utils/useLinkOpener' @@ -99,6 +100,8 @@ export const HumanMessageEditor: FunctionComponent<{ }) => { const telemetryRecorder = useTelemetryRecorder() + const [imageFile, setImageFile] = useState(undefined) + const editorRef = useRef(null) useImperativeHandle(parentEditorRef, (): PromptEditorRefAPI | null => editorRef.current, []) @@ -126,7 +129,7 @@ export const HumanMessageEditor: FunctionComponent<{ const experimentalPromptEditorEnabled = useFeatureFlag(FeatureFlag.CodyExperimentalPromptEditor) const onSubmitClick = useCallback( - (intent?: ChatMessage['intent'], forceSubmit?: boolean): void => { + async (intent?: ChatMessage['intent'], forceSubmit?: boolean): Promise => { if (!forceSubmit && submitState === 'emptyEditorValue') { return } @@ -142,6 +145,28 @@ export const HumanMessageEditor: FunctionComponent<{ const value = editorRef.current.getSerializedValue() parentOnSubmit(intent) + if (imageFile) { + const readFileGetBase64String = (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader() + reader.onload = () => { + const base64 = reader.result + if (base64 && typeof base64 === 'string') { + resolve(base64.split(',')[1]) + } else { + reject(new Error('Failed to read file')) + } + } + reader.onerror = () => reject(new Error('Failed to read file')) + reader.readAsDataURL(file) + }) + } + + const base64 = await readFileGetBase64String(imageFile) + getVSCodeAPI().postMessage({ command: 'chat/upload-file', base64 }) + setImageFile(undefined) + } + parentOnSubmit(intent) telemetryRecorder.recordEvent('cody.humanMessageEditor', 'submit', { metadata: { @@ -157,7 +182,15 @@ export const HumanMessageEditor: FunctionComponent<{ }, }) }, - [submitState, parentOnSubmit, onStop, telemetryRecorder.recordEvent, isFirstMessage, isSent] + [ + submitState, + parentOnSubmit, + onStop, + telemetryRecorder.recordEvent, + isFirstMessage, + isSent, + imageFile, + ] ) const onEditorEnterKey = useCallback( @@ -423,6 +456,7 @@ export const HumanMessageEditor: FunctionComponent<{ ) const Editor = experimentalPromptEditorEnabled ? PromptEditorV2 : PromptEditor + const experimentalOneBoxEnabled = useFeatureFlag(FeatureFlag.CodyExperimentalOneBoxDebug) return ( // biome-ignore lint/a11y/useKeyWithClickEvents: only relevant to click areas @@ -470,6 +504,9 @@ export const HumanMessageEditor: FunctionComponent<{ hidden={!focused && isSent} className={styles.toolbar} intent={intent} + imageFile={imageFile} + setImageFile={setImageFile} + experimentalOneBoxEnabled={experimentalOneBoxEnabled} /> )} diff --git a/vscode/webviews/chat/cells/messageCell/human/editor/toolbar/Toolbar.tsx b/vscode/webviews/chat/cells/messageCell/human/editor/toolbar/Toolbar.tsx index 6735273fb14b..e03d5839e94d 100644 --- a/vscode/webviews/chat/cells/messageCell/human/editor/toolbar/Toolbar.tsx +++ b/vscode/webviews/chat/cells/messageCell/human/editor/toolbar/Toolbar.tsx @@ -10,6 +10,7 @@ import { useActionSelect } from '../../../../../../prompts/PromptsTab' import { useClientConfig } from '../../../../../../utils/useClientConfig' import { AddContextButton } from './AddContextButton' import { SubmitButton, type SubmitButtonState } from './SubmitButton' +import { UploadImageButton } from './UploadImageButton' /** * The toolbar for the human message editor. @@ -35,6 +36,10 @@ export const Toolbar: FunctionComponent<{ intent?: ChatMessage['intent'] manuallySelectIntent: (intent: ChatMessage['intent']) => void + experimentalOneBoxEnabled?: boolean + + imageFile?: File + setImageFile: (file: File | undefined) => void }> = ({ userInfo, isEditorFocused, @@ -48,6 +53,9 @@ export const Toolbar: FunctionComponent<{ models, intent, manuallySelectIntent, + experimentalOneBoxEnabled, + imageFile, + setImageFile, }) => { /** * If the user clicks in a gap or on the toolbar outside of any of its buttons, report back to @@ -88,6 +96,14 @@ export const Toolbar: FunctionComponent<{ /> )} + { + + } + void +} + +export const UploadImageButton = (props: UploadImageButtonProps) => { + const fileInputRef = useRef(null) + + const handleButtonClick = () => { + fileInputRef.current?.click() + } + + const handleFileChange = async (event: React.ChangeEvent) => { + const file = event.target.files?.[0] + props.onClick(file) + } + + return ( + + + + + + {props.imageFile ? 'Remove attached image' : 'Upload an image'} + + + + ) +} diff --git a/vscode/webviews/components/modelSelectField/ModelSelectField.module.css b/vscode/webviews/components/modelSelectField/ModelSelectField.module.css index 6ef4a7f15e62..14350c68bdf4 100644 --- a/vscode/webviews/components/modelSelectField/ModelSelectField.module.css +++ b/vscode/webviews/components/modelSelectField/ModelSelectField.module.css @@ -36,6 +36,10 @@ margin-left: auto; } +.supports-image-upload-icon { + margin-left: auto; +} + .badge { margin-left: auto; line-height: 16px; @@ -45,12 +49,17 @@ border: 1px solid var(--vscode-contrastBorder); } +.supports-image-upload-icon + .badge { + margin-left: 0; +} + button > .model-title-with-icon .model-name { font-weight: normal; } button > .model-title-with-icon .model-icon, button > .model-title-with-icon .model-provider, -button > .model-title-with-icon .badge { +button > .model-title-with-icon .badge, +.supports-image-upload-icon { display: none; }