diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index b7978f2..54d7044 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,5 +1,28 @@ -Copilot instructions +## Copilot Usage Notes -Look carefully through [AGENTS.md](../AGENTS.md) for a description of the project and how to contribute. +Always skim [AGENTS.md](../AGENTS.md) before making changes—the document is the single source of truth for architecture, performance targets, and workflow expectations. -Follow instructions carefully. \ No newline at end of file +### Hot-path rules + +- Reuse the helpers in `src/http/utils.ts` (`writeUnauthorized`, `writeNotFound`, `writeRateLimit`, `writeErrorResponse`) instead of hand-written JSON responses. +- Preserve the SSE contract in `src/http/routes/chat.ts`: emit role chunk first, follow with `data: { ... }` payloads, and terminate with `data: [DONE]`. +- When streaming, keep `socket.setNoDelay(true)` on the response socket to avoid latency regressions. +- Honor `state.activeRequests` concurrency guard and return early 429s via `writeRateLimit`. + +### Tool calling compatibility + +- `mergeTools` already merges deprecated `functions`; prefer extending it over new code paths. +- The bridge treats `tool_choice: "required"` like `"auto"` and ignores `parallel_tool_calls`—reflect this limitation in docs if behavior changes. +- Stream tool call deltas using `delta.tool_calls` chunks containing JSON-encoded argument strings. Downstream clients should replace, not append, argument fragments. + +### Scope & contracts + +- Public endpoints are `/health`, `/v1/models`, `/v1/chat/completions`. Changing contracts requires README updates and a version bump. +- Keep the bridge loopback-only unless a new configuration knob is explicitly approved. +- Update configuration docs when introducing new `bridge.*` settings and run `npm run compile` before handing off changes. + +### Workflow + +- Plan with the todo-list tool, keep diffs minimal, and avoid formatting unrelated regions. +- Capture limitations or behavior differences (e.g., missing OpenAI response fields) in comments or docs so clients aren’t surprised. +- Summarize reality after each change: what was touched, how it was verified, and any follow-ups. \ No newline at end of file diff --git a/.github/instructions/ts.instructions.md b/.github/instructions/ts.instructions.md index e3e56a1..1f6e478 100644 --- a/.github/instructions/ts.instructions.md +++ b/.github/instructions/ts.instructions.md @@ -13,6 +13,7 @@ applyTo: '**/*.ts' - Prefer readable, explicit solutions over clever shortcuts. - Extend current abstractions before inventing new ones. - Prioritize maintainability and clarity, short methods and classes, clean code. +- Keep edits aligned with [AGENTS.md](../../AGENTS.md) and `.github/copilot-instructions.md`. ## Programming Language: TypeScript @@ -40,6 +41,11 @@ applyTo: '**/*.ts' - Use pure ES modules; never emit `require`, `module.exports`, or CommonJS helpers. - Rely on the project's build, lint, and test scripts unless asked otherwise. - Note design trade-offs when intent is not obvious. +- Reuse the HTTP helpers in `src/http/utils.ts` (`writeUnauthorized`, `writeNotFound`, `writeRateLimit`, `writeErrorResponse`) instead of writing ad-hoc JSON responses. +- Preserve the SSE contract in `src/http/routes/chat.ts`: send the role chunk first, follow with `data: { ... }` payloads, and always terminate with `data: [DONE]`. +- When streaming, call `res.socket?.setNoDelay(true)` before emitting chunks to avoid latency regressions. +- Honor the concurrency guard (`state.activeRequests`) and return early 429 responses via `writeRateLimit` when limits are exceeded. +- Communicate limitations of the VS Code LM API, e.g., `tool_choice: "required"` behaving like `"auto"` and lack of `parallel_tool_calls` support. ## Project Organization @@ -75,6 +81,7 @@ applyTo: '**/*.ts' - Send errors through the project's logging/telemetry utilities. - Surface user-facing errors via the repository's notification pattern. - Debounce configuration-driven updates and dispose resources deterministically. +- Prefer the pre-serialized error helpers for fast paths and document any new reason codes in README + status handlers. ## Architecture & Patterns @@ -126,6 +133,8 @@ applyTo: '**/*.ts' - Defer expensive work until users need it. - Batch or debounce high-frequency events to reduce thrash. - Track resource lifetimes to prevent leaks. +- Avoid repeated configuration reads in hot paths; cache settings when practical. +- Maintain streaming code paths without buffering entire responses; only accumulate when `stream: false`. ## Documentation & Comments diff --git a/AGENTS.md b/AGENTS.md index 89ec7c6..3225c37 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,12 +37,12 @@ The server is **local only** (loopback host by default) and is not meant for mul 2. **Imports**: All imports at file top. No inline `import('module')` types. 3. **ES Module Style**: Use `import` syntax (even though `commonjs` output). No `require` in source except in isolated legacy shims (currently none). 4. **Polka Typings**: The custom declaration in `src/types/polka.d.ts` must stay minimal but strongly typed. Extend only when you need new surface. -5. **Error Handling**: Use central `onError` (`server.ts`). Avoid swallowing errors; bubble or log via `verbose`. +5. **Error Handling**: Use central `onError` (`server.ts`). Avoid swallowing errors; bubble or log via `verbose`. Prefer the pre-serialized helpers in `src/http/utils.ts` (`writeUnauthorized`, `writeNotFound`, `writeRateLimit`, `writeErrorResponse`) instead of hand-crafted JSON bodies. 6. **Logging**: Use `verbose()` for debug (guarded by config), `info()` for one‑time start messages, `error()` sparingly (currently not widely used—add only if user‑facing severity). 7. **Status Bar**: Use `updateStatus(kind)` with kinds: `start | error | success`. Initial pending state relies on `state.modelAttempted`. -8. **Model Selection**: Always feature‑detect the LM API (`hasLMApi`). Return early on missing API with clear `state.lastReason` codes. +8. **Model Selection**: Always feature-detect the LM API (`hasLMApi`). Return early on missing API with clear `state.lastReason` codes. 9. **Endpoint Stability**: Public paths (`/health`, `/v1/models`, `/v1/chat/completions`). Changes require README updates and semantic version bump. -10. **Streaming**: SSE contract: multiple `data: {chunk}` events + final `data: [DONE]`. Preserve this shape. +10. **Streaming & Tool Calling**: SSE contract: multiple `data: {chunk}` events + final `data: [DONE]`. Preserve this shape. Tool call chunks must emit `delta.tool_calls` entries encoded as JSON; arguments may arrive as incremental strings, so downstream clients should replace rather than append. The bridge treats `tool_choice: "required"` the same as `"auto"` and ignores `parallel_tool_calls` because the VS Code LM API lacks those controls—communicate this limitation in README and responses if behaviour changes in future. --- @@ -111,6 +111,7 @@ Avoid high‑volume logs in hot loops. Guard truly verbose details behind featur - Concurrency limit enforced in `/v1/chat/completions` before model call; maintain early 429 path. - Streaming is async iteration; avoid buffering entire response unless `stream: false`. +- Disable Nagle’s algorithm on streaming sockets with `socket.setNoDelay(true)` before writing SSE payloads. - Do not introduce global locks; keep per‑request ephemeral state. --- diff --git a/README.md b/README.md index 9396c79..f129c83 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ # Copilot Bridge (VS Code Extension) -[![Visual Studio Marketplace](https://vsmarketplacebadges.dev/version/thinkability.copilot-bridge.svg)](https://marketplace.visualstudio.com/items?itemName=thinkability.copilot-bridge) +[![Visual Studio Marketplace Version](https://img.shields.io/visual-studio-marketplace/v/thinkability.copilot-bridge)](https://marketplace.visualstudio.com/items?itemName=thinkability.copilot-bridge) +[![Visual Studio Marketplace Installs](https://img.shields.io/visual-studio-marketplace/d/thinkability.copilot-bridge?label=installs)](https://marketplace.visualstudio.com/items?itemName=thinkability.copilot-bridge) Expose GitHub Copilot as a local, OpenAI-compatible HTTP endpoint running inside VS Code. The bridge forwards chat requests to Copilot using the VS Code Language Model API and streams results back to you. @@ -23,6 +24,7 @@ The extension will autostart and requires VS Code to be running. ## Changelog +- **v1.1.0** — Simplified architecture with focus on performance improvements. Copilot Bridge is now 20-30% faster doing raw inference. - **v1.0.0** — Modular architecture refactor with service layer, OpenAI type definitions, and tool calling support - **v0.2.2** — Polka HTTP server integration and model family selection improvements - **v0.1.5** — Server lifecycle fixes and improved error handling diff --git a/package.json b/package.json index c081009..99d25b8 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "name": "copilot-bridge", "displayName": "Copilot Bridge", "description": "Local OpenAI-compatible chat endpoint (inference) bridging to GitHub Copilot via the VS Code Language Model API.", - "version": "1.0.0", + "version": "1.1.0", "publisher": "thinkability", "repository": { "type": "git", @@ -22,10 +22,7 @@ "Other" ], "activationEvents": [ - "onStartupFinished", - "onCommand:bridge.enable", - "onCommand:bridge.disable", - "onCommand:bridge.status" + "onStartupFinished" ], "main": "./out/extension.js", "contributes": { diff --git a/src/http/auth.ts b/src/http/auth.ts index cb45dfa..9a14786 100644 --- a/src/http/auth.ts +++ b/src/http/auth.ts @@ -1,4 +1,21 @@ import type { IncomingMessage } from 'http'; -export const isAuthorized = (req: IncomingMessage, token: string): boolean => - !token || req.headers.authorization === `Bearer ${token}`; +// Cache the authorization header to avoid repeated concatenation +let cachedToken = ''; +let cachedAuthHeader = ''; + +/** + * Checks if the request is authorized against the configured token. + * Caches the full "Bearer " header to optimize hot path. + */ +export const isAuthorized = (req: IncomingMessage, token: string): boolean => { + if (!token) return true; + + // Update cache if token changed + if (token !== cachedToken) { + cachedToken = token; + cachedAuthHeader = `Bearer ${token}`; + } + + return req.headers.authorization === cachedAuthHeader; +}; diff --git a/src/http/routes/chat.ts b/src/http/routes/chat.ts index a450482..cb479f4 100644 --- a/src/http/routes/chat.ts +++ b/src/http/routes/chat.ts @@ -1,16 +1,29 @@ import * as vscode from 'vscode'; import type { IncomingMessage, ServerResponse } from 'http'; import { state } from '../../state'; -import { isChatCompletionRequest, type ChatCompletionRequest } from '../../messages'; -import { readJson, writeErrorResponse } from '../utils'; +import { + isChatCompletionRequest, + type ChatCompletionRequest, + normalizeMessagesLM, + convertOpenAIToolsToLM, + convertFunctionsToTools, + type Tool, +} from '../../messages'; +import { readJson, writeErrorResponse, writeJson } from '../utils'; import { verbose } from '../../log'; -import { ModelService } from '../../services/model-service'; -import { StreamingResponseHandler } from '../../services/streaming-handler'; -import { processLanguageModelResponse, sendCompletionResponse } from '../../services/response-formatter'; -import type { ChatCompletionContext } from '../../types/openai-types'; +import { getModel, hasLMApi } from '../../models'; +import { getBridgeConfig } from '../../config'; +import type { + ChatCompletionContext, + ProcessedResponseData, + OpenAIResponse, + OpenAIMessage, + OpenAIToolCall, + OpenAIChoice, +} from '../../types/openai-types'; /** - * Handles OpenAI-compatible chat completion requests with support for streaming and tool calling + * Handles OpenAI-compatible chat completion requests with support for streaming and tool calling. * @param req - HTTP request object * @param res - HTTP response object */ @@ -21,46 +34,49 @@ export async function handleChatCompletion(req: IncomingMessage, res: ServerResp try { const body = await readJson(req); if (!isChatCompletionRequest(body)) { - return writeErrorResponse(res, 400, 'invalid request', 'invalid_request_error', 'invalid_payload'); + writeErrorResponse(res, 400, 'invalid request', 'invalid_request_error', 'invalid_payload'); + return; } - const modelService = new ModelService(); - - // Validate model availability - const modelValidation = await modelService.validateModel(body.model); - if (!modelValidation.isValid) { - const errorMessage = body.model ? 'model not found' : 'Copilot unavailable'; - return writeErrorResponse( - res, - modelValidation.statusCode!, - errorMessage, - modelValidation.errorType!, - modelValidation.errorCode!, - modelValidation.reason || 'unknown_error' - ); + const model = await resolveModel(body.model, res); + if (!model) { + return; } - // Create processing context - const context = await modelService.createProcessingContext(body); - const chatContext = modelService.createChatCompletionContext(body, context.lmTools.length > 0); - - verbose(`LM request via API model=${context.model.family || context.model.id || context.model.name || 'unknown'} tools=${context.lmTools.length}`); + const config = getBridgeConfig(); + const mergedTools = mergeTools(body); + const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow); + const lmTools = convertOpenAIToolsToLM(mergedTools); + const requestOptions: vscode.LanguageModelChatRequestOptions = lmTools.length > 0 + ? { tools: lmTools } + : {}; + + const modelName = selectResponseModelName(model, body.model); + const chatContext = createChatCompletionContext(body, mergedTools.length > 0, modelName); + verbose(`LM request via API model=${model.family || model.id || model.name || 'unknown'} tools=${lmTools.length}`); - // Execute the Language Model request const cancellationToken = new vscode.CancellationTokenSource(); - const response = await context.model.sendRequest( - context.lmMessages, - context.requestOptions, - cancellationToken.token - ); - // Handle response based on streaming preference - if (chatContext.isStreaming) { - await handleStreamingResponse(res, response, chatContext, body); - } else { - await handleNonStreamingResponse(res, response, chatContext, body); + try { + const response = await model.sendRequest( + lmMessages as vscode.LanguageModelChatMessage[], + requestOptions, + cancellationToken.token + ); + + try { + if (chatContext.isStreaming) { + await streamResponse(res, response, chatContext); + } else { + const processed = await collectResponseData(response); + sendCompletionResponse(res, chatContext, processed, body); + } + } finally { + disposeResponse(response); + } + } finally { + cancellationToken.dispose(); } - } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); writeErrorResponse(res, 500, errorMessage || 'internal_error', 'server_error', 'internal_error'); @@ -70,29 +86,295 @@ export async function handleChatCompletion(req: IncomingMessage, res: ServerResp } } +const SSE_HEADERS = { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', +} as const; + /** - * Handles streaming response using Server-Sent Events + * Merges tools and deprecated functions, respecting tool_choice configuration. + * @param body - Chat completion request + * @returns Filtered array of tools to use */ -async function handleStreamingResponse( - res: ServerResponse, - response: vscode.LanguageModelChatResponse, - chatContext: ChatCompletionContext, - requestBody: ChatCompletionRequest -): Promise { - const streamHandler = new StreamingResponseHandler(res, chatContext, requestBody); - streamHandler.initializeStream(); - await streamHandler.processAndStreamResponse(response); +function mergeTools(body: ChatCompletionRequest): Tool[] { + // Early exit for disabled tools + if (body.tool_choice === 'none' || body.function_call === 'none') { + return []; + } + + const baseTools = body.tools ?? []; + const functionTools = convertFunctionsToTools(body.functions); + const combined = functionTools.length > 0 ? [...baseTools, ...functionTools] : baseTools; + + // Handle specific tool selection + if ( + body.tool_choice && + typeof body.tool_choice === 'object' && + 'type' in body.tool_choice && + body.tool_choice.type === 'function' && + 'function' in body.tool_choice && + body.tool_choice.function && + typeof body.tool_choice.function === 'object' && + 'name' in body.tool_choice.function + ) { + const fnName = body.tool_choice.function.name; + if (typeof fnName === 'string') { + return combined.filter((tool) => tool.function.name === fnName); + } + } + + return combined; +} + +async function resolveModel( + requestedModel: string | undefined, + res: ServerResponse +): Promise { + const model = await getModel(false, requestedModel); + if (model) { + return model; + } + + const hasLanguageModels = hasLMApi(); + if (requestedModel && hasLanguageModels) { + writeErrorResponse(res, 404, 'model not found', 'invalid_request_error', 'model_not_found', 'not_found'); + } else { + const reason = hasLanguageModels ? 'copilot_model_unavailable' : 'missing_language_model_api'; + writeErrorResponse(res, 503, 'Copilot unavailable', 'server_error', 'copilot_unavailable', reason); + } + return undefined; +} + +function createChatCompletionContext( + body: ChatCompletionRequest, + hasTools: boolean, + modelName: string +): ChatCompletionContext { + return { + requestId: `chatcmpl-${Math.random().toString(36).slice(2)}`, + modelName, + created: Math.floor(Date.now() / 1000), + hasTools, + isStreaming: body.stream === true, + }; } /** - * Handles non-streaming response with complete data + * Streams chat completion response using Server-Sent Events. + * @param res - HTTP response object + * @param response - VS Code Language Model response + * @param context - Chat completion context */ -async function handleNonStreamingResponse( +async function streamResponse( res: ServerResponse, response: vscode.LanguageModelChatResponse, - chatContext: ChatCompletionContext, - requestBody: ChatCompletionRequest + context: ChatCompletionContext ): Promise { - const processedData = await processLanguageModelResponse(response); - sendCompletionResponse(res, chatContext, processedData, requestBody); + // Disable Nagle's algorithm for lower latency streaming + if (res.socket) { + res.socket.setNoDelay(true); + } + + res.writeHead(200, SSE_HEADERS); + if (typeof res.flushHeaders === 'function') { + res.flushHeaders(); + } + verbose(`SSE start id=${context.requestId}`); + + let sawToolCall = false; + let sentRoleChunk = false; + + for await (const part of response.stream) { + // Send initial role chunk once + if (!sentRoleChunk) { + writeSseData(res, createChunkResponse(context, { role: 'assistant' }, null)); + sentRoleChunk = true; + } + + if (isToolCallPart(part)) { + sawToolCall = true; + writeSseData(res, createChunkResponse(context, { + tool_calls: [createToolCall(part)], + }, null)); + } else { + const content = extractTextContent(part); + if (content) { + writeSseData(res, createChunkResponse(context, { content }, null)); + } + } + } + + // Ensure role chunk is sent even for empty responses + if (!sentRoleChunk) { + writeSseData(res, createChunkResponse(context, { role: 'assistant' }, null)); + } + + const finalChunk = createChunkResponse(context, {}, sawToolCall ? 'tool_calls' : 'stop'); + writeSseData(res, finalChunk); + res.write('data: [DONE]\n\n'); + res.end(); + verbose(`SSE end id=${context.requestId}`); +} + +/** + * Collects complete response data from VS Code Language Model stream. + * @param response - VS Code Language Model response + * @returns Processed response data with content and tool calls + */ +async function collectResponseData( + response: vscode.LanguageModelChatResponse +): Promise { + let content = ''; + const toolCalls: OpenAIToolCall[] = []; + + for await (const part of response.stream) { + if (isToolCallPart(part)) { + toolCalls.push(createToolCall(part)); + } else { + content += extractTextContent(part); + } + } + + const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop'; + return { content, toolCalls, finishReason }; +} + +function sendCompletionResponse( + res: ServerResponse, + context: ChatCompletionContext, + data: ProcessedResponseData, + requestBody?: ChatCompletionRequest +): void { + const message = createOpenAIMessage(data, requestBody); + const response: OpenAIResponse = { + id: context.requestId, + object: 'chat.completion', + created: context.created, + model: context.modelName, + choices: [ + { + index: 0, + message, + finish_reason: data.finishReason, + }, + ], + usage: { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + }, + }; + + verbose(`Non-stream complete len=${data.content.length} tool_calls=${data.toolCalls.length}`); + writeJson(res, 200, response); +} + +function createOpenAIMessage( + data: ProcessedResponseData, + requestBody?: ChatCompletionRequest +): OpenAIMessage { + const base: OpenAIMessage = { + role: 'assistant', + content: data.toolCalls.length > 0 ? null : data.content, + }; + + if (data.toolCalls.length === 0) { + return base; + } + + const withTools: OpenAIMessage = { + ...base, + tool_calls: data.toolCalls, + }; + + if (data.toolCalls.length === 1 && requestBody?.function_call !== undefined) { + return { + ...withTools, + function_call: { + name: data.toolCalls[0].function.name, + arguments: data.toolCalls[0].function.arguments, + }, + }; + } + + return withTools; +} + +function createChunkResponse( + context: ChatCompletionContext, + delta: Partial, + finishReason: OpenAIChoice['finish_reason'] | null +): OpenAIResponse { + return { + id: context.requestId, + object: 'chat.completion.chunk', + created: context.created, + model: context.modelName, + choices: [ + { + index: 0, + delta, + finish_reason: finishReason, + }, + ], + }; +} + +function writeSseData(res: ServerResponse, data: OpenAIResponse): void { + res.write(`data: ${JSON.stringify(data)}\n\n`); +} + +function createToolCall(part: vscode.LanguageModelToolCallPart): OpenAIToolCall { + return { + id: part.callId, + type: 'function', + function: { + name: part.name, + arguments: JSON.stringify(part.input), + }, + }; +} + +function isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart { + return ( + part !== null && + typeof part === 'object' && + 'callId' in part && + 'name' in part && + 'input' in part + ); +} + +function extractTextContent(part: unknown): string { + if (typeof part === 'string') { + return part; + } + + if (part !== null && typeof part === 'object' && 'value' in part) { + return String((part as { value: unknown }).value) || ''; + } + + return ''; +} + +function disposeResponse(response: vscode.LanguageModelChatResponse): void { + const disposable = response as { dispose?: () => void }; + if (typeof disposable.dispose === 'function') { + disposable.dispose(); + } +} + +/** + * Selects the most appropriate model name for the response. + * Prioritizes requested model, then model ID, family, name, and finally defaults to 'copilot'. + * @param model - VS Code Language Model instance + * @param requestedModel - Model name from the request + * @returns Model name to use in response + */ +function selectResponseModelName( + model: vscode.LanguageModelChat, + requestedModel: string | undefined +): string { + return requestedModel ?? model.id ?? model.family ?? model.name ?? 'copilot'; } diff --git a/src/http/server.ts b/src/http/server.ts index ffda4da..187826b 100644 --- a/src/http/server.ts +++ b/src/http/server.ts @@ -6,7 +6,7 @@ import { isAuthorized } from './auth'; import { handleHealthCheck } from './routes/health'; import { handleModelsRequest } from './routes/models'; import { handleChatCompletion } from './routes/chat'; -import { writeErrorResponse } from './utils'; +import { writeErrorResponse, writeNotFound, writeRateLimit, writeUnauthorized } from './utils'; import { ensureOutput, verbose } from '../log'; import { updateStatus } from '../status'; @@ -26,20 +26,32 @@ export const startServer = async (): Promise => { } }, onNoMatch: (_req, res) => { - writeErrorResponse(res, 404, 'not found', 'invalid_request_error', 'route_not_found'); + writeNotFound(res); }, }); - // Logging + auth middleware - app.use((req: IncomingMessage & { method?: string; url?: string }, res: ServerResponse, next: () => void) => { - verbose(`HTTP ${req.method} ${req.url}`); + // Auth middleware - runs before all routes (except /health) + app.use((req, res, next) => { + const path = req.url ?? '/'; + if (path === '/health') { + return next(); + } if (!isAuthorized(req, config.token)) { - writeErrorResponse(res, 401, 'unauthorized', 'invalid_request_error', 'unauthorized'); + writeUnauthorized(res); return; } next(); }); + // Verbose logging middleware + const cfg = getBridgeConfig(); + if (cfg.verbose) { + app.use((req, res, next) => { + verbose(`${req.method} ${req.url}`); + next(); + }); + } + app.get('/health', async (_req: IncomingMessage, res: ServerResponse) => { await handleHealthCheck(res, config.verbose); }); @@ -49,18 +61,15 @@ export const startServer = async (): Promise => { }); app.post('/v1/chat/completions', async (req: IncomingMessage, res: ServerResponse) => { + // Rate limiting check if (state.activeRequests >= config.maxConcurrent) { - res.writeHead(429, { 'Content-Type': 'application/json', 'Retry-After': '1' }); - res.end(JSON.stringify({ - error: { - message: 'too many requests', - type: 'rate_limit_error', - code: 'rate_limit_exceeded', - }, - })); - verbose(`429 throttled (active=${state.activeRequests}, max=${config.maxConcurrent})`); + if (config.verbose) { + verbose(`429 throttled (active=${state.activeRequests}, max=${config.maxConcurrent})`); + } + writeRateLimit(res); return; } + try { await handleChatCompletion(req, res); } catch (e) { diff --git a/src/http/utils.ts b/src/http/utils.ts index 4c81c63..437e59d 100644 --- a/src/http/utils.ts +++ b/src/http/utils.ts @@ -9,8 +9,64 @@ export interface ErrorResponse { }; } +// Pre-serialized common error responses for hot paths +const UNAUTHORIZED_ERROR = JSON.stringify({ + error: { + message: 'unauthorized', + type: 'invalid_request_error', + code: 'unauthorized', + }, +}); + +const NOT_FOUND_ERROR = JSON.stringify({ + error: { + message: 'not found', + type: 'invalid_request_error', + code: 'route_not_found', + }, +}); + +const RATE_LIMIT_ERROR = JSON.stringify({ + error: { + message: 'too many requests', + type: 'rate_limit_error', + code: 'rate_limit_exceeded', + }, +}); + +// Reusable header objects +const JSON_HEADERS = { 'Content-Type': 'application/json' } as const; +const RATE_LIMIT_HEADERS = { + 'Content-Type': 'application/json', + 'Retry-After': '1', +} as const; + +/** + * Fast-path unauthorized response (pre-serialized). + */ +export const writeUnauthorized = (res: ServerResponse): void => { + res.writeHead(401, JSON_HEADERS); + res.end(UNAUTHORIZED_ERROR); +}; + +/** + * Fast-path not found response (pre-serialized). + */ +export const writeNotFound = (res: ServerResponse): void => { + res.writeHead(404, JSON_HEADERS); + res.end(NOT_FOUND_ERROR); +}; + +/** + * Fast-path rate limit response (pre-serialized). + */ +export const writeRateLimit = (res: ServerResponse): void => { + res.writeHead(429, RATE_LIMIT_HEADERS); + res.end(RATE_LIMIT_ERROR); +}; + export const writeJson = (res: ServerResponse, status: number, body: T): void => { - res.writeHead(status, { 'Content-Type': 'application/json' }); + res.writeHead(status, JSON_HEADERS); res.end(JSON.stringify(body)); }; diff --git a/src/messages.ts b/src/messages.ts index 372af44..1173d93 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -152,8 +152,9 @@ export const normalizeMessagesLM = ( ).slice(-histWindow * 3); // Increased window to account for tool messages const lmMsg = (vscode as unknown as { LanguageModelChatMessage?: typeof vscode.LanguageModelChatMessage }).LanguageModelChatMessage; - const UserFactory = lmMsg?.User; - const AssistantFactory = lmMsg?.Assistant; + const userFactory = lmMsg?.User; + const assistantFactory = lmMsg?.Assistant; + const hasFactories = Boolean(userFactory && assistantFactory); const result: (vscode.LanguageModelChatMessage | { role: 'user' | 'assistant'; content: string })[] = []; let firstUserSeen = false; @@ -165,7 +166,7 @@ export const normalizeMessagesLM = ( text = `[SYSTEM]\n${toText(systemMessage.content)}\n\n[DIALOG]\nuser: ${text}`; firstUserSeen = true; } - result.push(UserFactory ? UserFactory(text) : { role: 'user', content: text }); + result.push(userFactory ? userFactory(text) : { role: 'user', content: text }); } else if (m.role === 'assistant') { // For assistant messages, we need to handle both content and tool calls let text = ''; @@ -192,20 +193,20 @@ export const normalizeMessagesLM = ( text = `[FUNCTION_CALL] ${m.function_call.name}(${m.function_call.arguments})`; } - result.push(AssistantFactory ? AssistantFactory(text) : { role: 'assistant', content: text }); + result.push(assistantFactory ? assistantFactory(text) : { role: 'assistant', content: text }); } else if (m.role === 'tool') { // Tool messages should be converted to user messages with tool result context const toolResult = `[TOOL_RESULT:${m.tool_call_id}] ${toText(m.content)}`; - result.push(UserFactory ? UserFactory(toolResult) : { role: 'user', content: toolResult }); + result.push(userFactory ? userFactory(toolResult) : { role: 'user', content: toolResult }); } } if (!firstUserSeen && systemMessage) { const text = `[SYSTEM]\n${toText(systemMessage.content)}`; - result.unshift(UserFactory ? UserFactory(text) : { role: 'user', content: text }); + result.unshift(userFactory ? userFactory(text) : { role: 'user', content: text }); } - if (result.length === 0) result.push(UserFactory ? UserFactory('') : { role: 'user', content: '' }); + if (result.length === 0) result.push(userFactory ? userFactory('') : { role: 'user', content: '' }); return result; }; diff --git a/src/services/model-service.ts b/src/services/model-service.ts deleted file mode 100644 index 562f899..0000000 --- a/src/services/model-service.ts +++ /dev/null @@ -1,99 +0,0 @@ -import type * as vscode from 'vscode'; -import type { ChatCompletionRequest } from '../messages'; -import type { - ModelValidationResult, - RequestProcessingContext, - ChatCompletionContext -} from '../types/openai-types'; -import { - extractAndMergeTools, - createLanguageModelRequestOptions -} from './request-processor'; -import { getModel, hasLMApi } from '../models'; -import { normalizeMessagesLM, convertOpenAIToolsToLM } from '../messages'; -import { getBridgeConfig } from '../config'; - -/** - * Service for validating models and creating request processing context - */ -export class ModelService { - - /** - * Validates the requested model and returns appropriate error details if invalid - * @param requestedModel - The model identifier from the request - * @returns Validation result with error details if model is unavailable - */ - public async validateModel(requestedModel?: string): Promise { - const model = await getModel(false, requestedModel); - - if (!model) { - const hasLM = hasLMApi(); - - if (requestedModel && hasLM) { - return { - isValid: false, - statusCode: 404, - errorType: 'invalid_request_error', - errorCode: 'model_not_found', - reason: 'not_found' - }; - } - - const reason = !hasLM ? 'missing_language_model_api' : 'copilot_model_unavailable'; - return { - isValid: false, - statusCode: 503, - errorType: 'server_error', - errorCode: 'copilot_unavailable', - reason - }; - } - - return { isValid: true }; - } - - /** - * Creates a complete request processing context from validated inputs - * @param body - The validated chat completion request - * @returns Processing context with all required elements for the Language Model API - */ - public async createProcessingContext(body: ChatCompletionRequest): Promise { - const model = await getModel(false, body.model); - if (!model) { - throw new Error('Model validation should be performed before creating processing context'); - } - - const config = getBridgeConfig(); - const mergedTools = extractAndMergeTools(body); - const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow); - const lmTools = convertOpenAIToolsToLM(mergedTools); - const requestOptions = createLanguageModelRequestOptions(lmTools); - - return { - model, - lmMessages: lmMessages as vscode.LanguageModelChatMessage[], - lmTools, - requestOptions, - mergedTools - }; - } - - /** - * Creates chat completion context for response formatting - * @param body - The chat completion request - * @param hasTools - Whether tools are present in the request - * @returns Context object for response handling - */ - public createChatCompletionContext( - body: ChatCompletionRequest, - hasTools: boolean - ): ChatCompletionContext { - return { - requestId: `chatcmpl-${Math.random().toString(36).slice(2)}`, - modelName: body.model || 'copilot', - created: Math.floor(Date.now() / 1000), - hasTools, - isStreaming: body.stream !== false - }; - } -} \ No newline at end of file diff --git a/src/services/request-processor.ts b/src/services/request-processor.ts deleted file mode 100644 index ccdcf82..0000000 --- a/src/services/request-processor.ts +++ /dev/null @@ -1,39 +0,0 @@ -import type { ChatCompletionRequest, Tool } from '../messages'; -import type * as vscode from 'vscode'; - -/** - * Validates and extracts tool configurations from request body - * @param body - The parsed request body - * @returns Combined tools array including converted deprecated functions - */ -export function extractAndMergeTools(body: ChatCompletionRequest): Tool[] { - const tools = body.tools || []; - - if (body.functions) { - // Convert deprecated functions to tools format - const convertedTools: Tool[] = body.functions.map(func => ({ - type: 'function' as const, - function: func - })); - return [...tools, ...convertedTools]; - } - - return tools; -} - -/** - * Creates VS Code Language Model request options from processed context - * @param lmTools - Array of Language Model compatible tools - * @returns Request options object for the Language Model API - */ -export function createLanguageModelRequestOptions( - lmTools: vscode.LanguageModelChatTool[] -): vscode.LanguageModelChatRequestOptions { - const options: vscode.LanguageModelChatRequestOptions = {}; - - if (lmTools.length > 0) { - options.tools = lmTools; - } - - return options; -} \ No newline at end of file diff --git a/src/services/response-formatter.ts b/src/services/response-formatter.ts deleted file mode 100644 index dce7f4a..0000000 --- a/src/services/response-formatter.ts +++ /dev/null @@ -1,158 +0,0 @@ -import type * as vscode from 'vscode'; -import type { ServerResponse } from 'http'; -import type { - OpenAIResponse, - OpenAIChoice, - OpenAIMessage, - OpenAIToolCall, - ChatCompletionContext, - ProcessedResponseData -} from '../types/openai-types'; -import type { ChatCompletionRequest } from '../messages'; -import { writeJson } from '../http/utils'; -import { verbose } from '../log'; - -/** - * Processes VS Code Language Model stream parts into structured data - * @param response - The VS Code Language Model chat response - * @returns Promise resolving to processed content and tool calls - */ -export async function processLanguageModelResponse( - response: vscode.LanguageModelChatResponse -): Promise { - let content = ''; - const toolCalls: OpenAIToolCall[] = []; - - for await (const part of response.stream) { - if (isToolCallPart(part)) { - const toolCall: OpenAIToolCall = { - id: part.callId, - type: 'function', - function: { - name: part.name, - arguments: JSON.stringify(part.input) - } - }; - toolCalls.push(toolCall); - } else if (isTextPart(part)) { - content += extractTextContent(part); - } - } - - const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop'; - - return { - content, - toolCalls, - finishReason - }; -} - -/** - * Creates an OpenAI-compatible response message - * @param data - The processed response data - * @param requestBody - Original request body for backward compatibility - * @returns OpenAI message object - */ -export function createOpenAIMessage( - data: ProcessedResponseData, - requestBody?: ChatCompletionRequest -): OpenAIMessage { - const baseMessage = { - role: 'assistant' as const, - content: data.toolCalls.length > 0 ? null : data.content, - }; - - // Add tool_calls if present - if (data.toolCalls.length > 0) { - const messageWithTools = { - ...baseMessage, - tool_calls: data.toolCalls, - }; - - // For backward compatibility, also add function_call if there's exactly one tool call - if (data.toolCalls.length === 1 && requestBody?.function_call !== undefined) { - return { - ...messageWithTools, - function_call: { - name: data.toolCalls[0].function.name, - arguments: data.toolCalls[0].function.arguments - } - }; - } - - return messageWithTools; - } - - return baseMessage; -} - -/** - * Sends a complete (non-streaming) OpenAI-compatible response - * @param res - HTTP response object - * @param context - Chat completion context - * @param data - Processed response data - * @param requestBody - Original request body - */ -export function sendCompletionResponse( - res: ServerResponse, - context: ChatCompletionContext, - data: ProcessedResponseData, - requestBody?: ChatCompletionRequest -): void { - const message = createOpenAIMessage(data, requestBody); - - const responseObj: OpenAIResponse = { - id: context.requestId, - object: 'chat.completion', - created: context.created, - model: context.modelName, - choices: [{ - index: 0, - message, - finish_reason: data.finishReason, - }], - usage: { - prompt_tokens: 0, // VS Code API doesn't provide token counts - completion_tokens: 0, - total_tokens: 0 - } - }; - - verbose(`Non-stream complete len=${data.content.length} tool_calls=${data.toolCalls.length}`); - writeJson(res, 200, responseObj); -} - -/** - * Type guard for VS Code LanguageModelToolCallPart - */ -function isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart { - return part !== null && - typeof part === 'object' && - 'callId' in part && - 'name' in part && - 'input' in part; -} - -/** - * Type guard for text content parts - */ -function isTextPart(part: unknown): boolean { - return typeof part === 'string' || - (part !== null && typeof part === 'object' && 'value' in part); -} - -/** - * Extracts text content from various part types - */ -function extractTextContent(part: unknown): string { - if (typeof part === 'string') { - return part; - } - - if (part !== null && typeof part === 'object' && 'value' in part) { - return String((part as { value: unknown }).value) || ''; - } - - return ''; -} \ No newline at end of file diff --git a/src/services/streaming-handler.ts b/src/services/streaming-handler.ts deleted file mode 100644 index a972027..0000000 --- a/src/services/streaming-handler.ts +++ /dev/null @@ -1,190 +0,0 @@ -import type * as vscode from 'vscode'; -import type { ServerResponse } from 'http'; -import type { - OpenAIResponse, - OpenAIToolCall, - ChatCompletionContext -} from '../types/openai-types'; -import type { ChatCompletionRequest } from '../messages'; -import { verbose } from '../log'; - -/** - * Handles Server-Sent Events streaming for OpenAI-compatible chat completions - */ -export class StreamingResponseHandler { - private readonly response: ServerResponse; - private readonly context: ChatCompletionContext; - private readonly requestBody?: ChatCompletionRequest; - - constructor( - response: ServerResponse, - context: ChatCompletionContext, - requestBody?: ChatCompletionRequest - ) { - this.response = response; - this.context = context; - this.requestBody = requestBody; - } - - /** - * Initializes the SSE stream with proper headers - */ - public initializeStream(): void { - this.response.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - }); - - verbose(`SSE start id=${this.context.requestId}`); - } - - /** - * Processes the Language Model response stream and sends SSE chunks - * @param languageModelResponse - VS Code Language Model response - */ - public async processAndStreamResponse( - languageModelResponse: vscode.LanguageModelChatResponse - ): Promise { - const toolCalls: OpenAIToolCall[] = []; - - for await (const part of languageModelResponse.stream) { - if (this.isToolCallPart(part)) { - const toolCall = this.createToolCallFromPart(part); - toolCalls.push(toolCall); - this.sendToolCallChunk(toolCall); - } else if (this.isTextPart(part)) { - const content = this.extractTextContent(part); - if (content) { - this.sendContentChunk(content); - } - } - } - - this.sendFinalChunk(toolCalls.length > 0 ? 'tool_calls' : 'stop'); - this.endStream(); - } - - /** - * Sends a content delta chunk - */ - private sendContentChunk(content: string): void { - const chunkResponse: OpenAIResponse = { - id: this.context.requestId, - object: 'chat.completion.chunk', - created: this.context.created, - model: this.context.modelName, - choices: [{ - index: 0, - delta: { content }, - finish_reason: null - }] - }; - - this.writeSSEData(chunkResponse); - } - - /** - * Sends a tool call chunk - */ - private sendToolCallChunk(toolCall: OpenAIToolCall): void { - const chunkResponse: OpenAIResponse = { - id: this.context.requestId, - object: 'chat.completion.chunk', - created: this.context.created, - model: this.context.modelName, - choices: [{ - index: 0, - delta: { - tool_calls: [toolCall] - }, - finish_reason: null - }] - }; - - this.writeSSEData(chunkResponse); - } - - /** - * Sends the final completion chunk with finish reason - */ - private sendFinalChunk(finishReason: 'stop' | 'tool_calls'): void { - const finalChunkResponse: OpenAIResponse = { - id: this.context.requestId, - object: 'chat.completion.chunk', - created: this.context.created, - model: this.context.modelName, - choices: [{ - index: 0, - delta: {}, - finish_reason: finishReason - }] - }; - - this.writeSSEData(finalChunkResponse); - } - - /** - * Ends the SSE stream - */ - private endStream(): void { - verbose(`SSE end id=${this.context.requestId}`); - this.response.write('data: [DONE]\n\n'); - this.response.end(); - } - - /** - * Writes data to the SSE stream - */ - private writeSSEData(data: OpenAIResponse): void { - this.response.write(`data: ${JSON.stringify(data)}\n\n`); - } - - /** - * Creates an OpenAI tool call from VS Code Language Model part - */ - private createToolCallFromPart(part: vscode.LanguageModelToolCallPart): OpenAIToolCall { - return { - id: part.callId, - type: 'function', - function: { - name: part.name, - arguments: JSON.stringify(part.input) - } - }; - } - - /** - * Type guard for VS Code LanguageModelToolCallPart - */ - private isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart { - return part !== null && - typeof part === 'object' && - 'callId' in part && - 'name' in part && - 'input' in part; - } - - /** - * Type guard for text content parts - */ - private isTextPart(part: unknown): boolean { - return typeof part === 'string' || - (part !== null && typeof part === 'object' && 'value' in part); - } - - /** - * Extracts text content from various part types - */ - private extractTextContent(part: unknown): string { - if (typeof part === 'string') { - return part; - } - - if (part !== null && typeof part === 'object' && 'value' in part) { - return String((part as { value: unknown }).value) || ''; - } - - return ''; - } -} \ No newline at end of file diff --git a/src/status.ts b/src/status.ts index 2726b41..d519cde 100644 --- a/src/status.ts +++ b/src/status.ts @@ -23,9 +23,9 @@ export const updateStatus = (kind: BridgeStatusKind): void => { switch (kind) { case 'start': { - const availability = state.modelCache ? 'OK' : (state.modelAttempted ? 'Unavailable' : 'Pending'); - state.statusBarItem.text = `Copilot Bridge: ${availability} @ ${shown}`; - info(`Started at http://${shown} | Copilot: ${state.modelCache ? 'ok' : (state.modelAttempted ? 'unavailable' : 'pending')}`); + const availability = state.modelCache ? 'OK' : (state.modelAttempted ? 'Unavailable' : 'Pending'); + state.statusBarItem.text = `Copilot Bridge: ${availability} @ ${shown}`; + info(`Started at http://${shown} | Copilot: ${state.modelCache ? 'ok' : (state.modelAttempted ? 'unavailable' : 'pending')}`); break; } case 'error': diff --git a/src/types/openai-types.ts b/src/types/openai-types.ts index 130e406..65be142 100644 --- a/src/types/openai-types.ts +++ b/src/types/openai-types.ts @@ -1,6 +1,3 @@ -import type * as vscode from 'vscode'; -import type { Tool } from '../messages'; - /** * OpenAI API compatible types for request and response handling */ @@ -56,26 +53,4 @@ export interface ProcessedResponseData { readonly content: string; readonly toolCalls: OpenAIToolCall[]; readonly finishReason: OpenAIChoice['finish_reason']; -} - -/** - * Validates that the request model is available and properly configured - */ -export interface ModelValidationResult { - readonly isValid: boolean; - readonly statusCode?: number; - readonly errorType?: string; - readonly errorCode?: string; - readonly reason?: string; -} - -/** - * Consolidated request processing context for chat completions - */ -export interface RequestProcessingContext { - readonly model: vscode.LanguageModelChat; - readonly lmMessages: vscode.LanguageModelChatMessage[]; - readonly lmTools: vscode.LanguageModelChatTool[]; - readonly requestOptions: vscode.LanguageModelChatRequestOptions; - readonly mergedTools: Tool[]; } \ No newline at end of file