diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..b7978f2 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,5 @@ +Copilot instructions + +Look carefully through [AGENTS.md](../AGENTS.md) for a description of the project and how to contribute. + +Follow instructions carefully. \ No newline at end of file diff --git a/.github/instructions/ts.instructions.md b/.github/instructions/ts.instructions.md new file mode 100644 index 0000000..e3e56a1 --- /dev/null +++ b/.github/instructions/ts.instructions.md @@ -0,0 +1,134 @@ +--- +description: 'Guidelines for TypeScript Development targeting TypeScript 5.x and ES2022 output' +applyTo: '**/*.ts' +--- + +# TypeScript Development + +> These instructions assume projects are built with TypeScript 5.x (or newer) compiling to an ES2022 JavaScript baseline. Adjust guidance if your runtime requires older language targets or down-level transpilation. + +## Core Intent + +- Respect the existing architecture and coding standards. +- Prefer readable, explicit solutions over clever shortcuts. +- Extend current abstractions before inventing new ones. +- Prioritize maintainability and clarity, short methods and classes, clean code. + +## Programming Language: TypeScript + +**TypeScript Best Practices:** +- Use strict TypeScript configuration with `"strict": true` +- Prefer interfaces over type aliases for object shapes +- Use explicit return types for all public functions +- Avoid `any` type - use `unknown` or proper typing instead +- Use utility types (Pick, Omit, Partial) for type transformations +- Implement proper null/undefined checking + +## Code Style: Clean Code + +**Clean Code Principles:** +- Write self-documenting code with meaningful names +- Keep functions small and focused on a single responsibility +- Avoid deep nesting and complex conditional statements +- Use consistent formatting and indentation +- Write code that tells a story and is easy to understand +- Refactor ruthlessly to eliminate code smells + +## General Guardrails + +- Target TypeScript 5.x / ES2022 and prefer native features over polyfills. +- Use pure ES modules; never emit `require`, `module.exports`, or CommonJS helpers. +- Rely on the project's build, lint, and test scripts unless asked otherwise. +- Note design trade-offs when intent is not obvious. + +## Project Organization + +- Follow the repository's folder and responsibility layout for new code. +- Use kebab-case filenames (e.g., `user-session.ts`, `data-service.ts`) unless told otherwise. +- Keep tests, types, and helpers near their implementation when it aids discovery. +- Reuse or extend shared utilities before adding new ones. + +## Naming & Style + +- Use PascalCase for classes, interfaces, enums, and type aliases; camelCase for everything else. +- Skip interface prefixes like `I`; rely on descriptive names. +- Name things for their behavior or domain meaning, not implementation. + +## Formatting & Style + +- Run the repository's lint/format scripts (e.g., `npm run lint`) before submitting. +- Match the project's indentation, quote style, and trailing comma rules. +- Keep functions focused; extract helpers when logic branches grow. +- Favor immutable data and pure functions when practical. + +## Type System Expectations + +- Avoid `any` (implicit or explicit); prefer `unknown` plus narrowing. +- Use discriminated unions for realtime events and state machines. +- Centralize shared contracts instead of duplicating shapes. +- Express intent with TypeScript utility types (e.g., `Readonly`, `Partial`, `Record`). + +## Async, Events & Error Handling + +- Use `async/await`; wrap awaits in try/catch with structured errors. +- Guard edge cases early to avoid deep nesting. +- Send errors through the project's logging/telemetry utilities. +- Surface user-facing errors via the repository's notification pattern. +- Debounce configuration-driven updates and dispose resources deterministically. + +## Architecture & Patterns + +- Follow the repository's dependency injection or composition pattern; keep modules single-purpose. +- Observe existing initialization and disposal sequences when wiring into lifecycles. +- Keep transport, domain, and presentation layers decoupled with clear interfaces. +- Supply lifecycle hooks (e.g., `initialize`, `dispose`) and targeted tests when adding services. + +## External Integrations + +- Instantiate clients outside hot paths and inject them for testability. +- Never hardcode secrets; load them from secure sources. +- Apply retries, backoff, and cancellation to network or IO calls. +- Normalize external responses and map errors to domain shapes. + +## Security Practices + +- Validate and sanitize external input with schema validators or type guards. +- Avoid dynamic code execution and untrusted template rendering. +- Encode untrusted content before rendering HTML; use framework escaping or trusted types. +- Use parameterized queries or prepared statements to block injection. +- Keep secrets in secure storage, rotate them regularly, and request least-privilege scopes. +- Favor immutable flows and defensive copies for sensitive data. +- Use vetted crypto libraries only. +- Patch dependencies promptly and monitor advisories. + +## Configuration & Secrets + +- Reach configuration through shared helpers and validate with schemas or dedicated validators. +- Handle secrets via the project's secure storage; guard `undefined` and error states. +- Document new configuration keys and update related tests. + +## UI & UX Components + +- Sanitize user or external content before rendering. +- Keep UI layers thin; push heavy logic to services or state managers. +- Use messaging or events to decouple UI from business logic. + +## Testing Expectations + +- Add or update unit tests with the project's framework and naming style. +- Expand integration or end-to-end suites when behavior crosses modules or platform APIs. +- Run targeted test scripts for quick feedback before submitting. +- Avoid brittle timing assertions; prefer fake timers or injected clocks. + +## Performance & Reliability + +- Lazy-load heavy dependencies and dispose them when done. +- Defer expensive work until users need it. +- Batch or debounce high-frequency events to reduce thrash. +- Track resource lifetimes to prevent leaks. + +## Documentation & Comments + +- Add JSDoc to public APIs; include `@remarks` or `@example` when helpful. +- Write comments that capture intent, and remove stale notes during refactors. +- Update architecture or design docs when introducing significant patterns. \ No newline at end of file diff --git a/package.json b/package.json index 7cb4887..c081009 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "name": "copilot-bridge", "displayName": "Copilot Bridge", "description": "Local OpenAI-compatible chat endpoint (inference) bridging to GitHub Copilot via the VS Code Language Model API.", - "version": "0.2.2", + "version": "1.0.0", "publisher": "thinkability", "repository": { "type": "git", diff --git a/src/http/routes/chat.ts b/src/http/routes/chat.ts index c1dae65..a450482 100644 --- a/src/http/routes/chat.ts +++ b/src/http/routes/chat.ts @@ -1,14 +1,20 @@ import * as vscode from 'vscode'; import type { IncomingMessage, ServerResponse } from 'http'; import { state } from '../../state'; -import { getBridgeConfig } from '../../config'; -import { isChatCompletionRequest, normalizeMessagesLM } from '../../messages'; -import { getModel, hasLMApi } from '../../models'; -import { readJson, writeErrorResponse, writeJson } from '../utils'; +import { isChatCompletionRequest, type ChatCompletionRequest } from '../../messages'; +import { readJson, writeErrorResponse } from '../utils'; import { verbose } from '../../log'; +import { ModelService } from '../../services/model-service'; +import { StreamingResponseHandler } from '../../services/streaming-handler'; +import { processLanguageModelResponse, sendCompletionResponse } from '../../services/response-formatter'; +import type { ChatCompletionContext } from '../../types/openai-types'; -export const handleChatCompletion = async (req: IncomingMessage, res: ServerResponse): Promise => { - const config = getBridgeConfig(); +/** + * Handles OpenAI-compatible chat completion requests with support for streaming and tool calling + * @param req - HTTP request object + * @param res - HTTP response object + */ +export async function handleChatCompletion(req: IncomingMessage, res: ServerResponse): Promise { state.activeRequests++; verbose(`Request started (active=${state.activeRequests})`); @@ -18,69 +24,75 @@ export const handleChatCompletion = async (req: IncomingMessage, res: ServerResp return writeErrorResponse(res, 400, 'invalid request', 'invalid_request_error', 'invalid_payload'); } - const requestedModel = body.model; - const stream = body.stream !== false; // default true - const model = await getModel(false, requestedModel); - - if (!model) { - const hasLM = hasLMApi(); - if (requestedModel && hasLM) { - state.lastReason = 'not_found'; - return writeErrorResponse(res, 404, 'model not found', 'invalid_request_error', 'model_not_found', 'not_found'); - } - const reason = !hasLM ? 'missing_language_model_api' : (state.lastReason || 'copilot_model_unavailable'); - return writeErrorResponse(res, 503, 'Copilot unavailable', 'server_error', 'copilot_unavailable', reason); + const modelService = new ModelService(); + + // Validate model availability + const modelValidation = await modelService.validateModel(body.model); + if (!modelValidation.isValid) { + const errorMessage = body.model ? 'model not found' : 'Copilot unavailable'; + return writeErrorResponse( + res, + modelValidation.statusCode!, + errorMessage, + modelValidation.errorType!, + modelValidation.errorCode!, + modelValidation.reason || 'unknown_error' + ); } - const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow) as vscode.LanguageModelChatMessage[]; - verbose(`LM request via API model=${model.family || model.id || model.name || 'unknown'}`); + // Create processing context + const context = await modelService.createProcessingContext(body); + const chatContext = modelService.createChatCompletionContext(body, context.lmTools.length > 0); + + verbose(`LM request via API model=${context.model.family || context.model.id || context.model.name || 'unknown'} tools=${context.lmTools.length}`); - const cts = new vscode.CancellationTokenSource(); - const response = await model.sendRequest(lmMessages, {}, cts.token); - await sendResponse(res, response, stream); - } catch (e) { - const msg = e instanceof Error ? e.message : String(e); - writeErrorResponse(res, 500, msg || 'internal_error', 'server_error', 'internal_error'); + // Execute the Language Model request + const cancellationToken = new vscode.CancellationTokenSource(); + const response = await context.model.sendRequest( + context.lmMessages, + context.requestOptions, + cancellationToken.token + ); + + // Handle response based on streaming preference + if (chatContext.isStreaming) { + await handleStreamingResponse(res, response, chatContext, body); + } else { + await handleNonStreamingResponse(res, response, chatContext, body); + } + + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + writeErrorResponse(res, 500, errorMessage || 'internal_error', 'server_error', 'internal_error'); } finally { state.activeRequests--; verbose(`Request complete (active=${state.activeRequests})`); } -}; +} -const sendResponse = async (res: ServerResponse, response: vscode.LanguageModelChatResponse, stream: boolean): Promise => { - if (stream) { - res.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - }); - const id = `cmp_${Math.random().toString(36).slice(2)}`; - verbose(`SSE start id=${id}`); - for await (const fragment of response.text) { - res.write(`data: ${JSON.stringify({ - id, - object: 'chat.completion.chunk', - choices: [{ index: 0, delta: { content: fragment } }], - })}\n\n`); - } - verbose(`SSE end id=${id}`); - res.write('data: [DONE]\n\n'); - res.end(); - return; - } +/** + * Handles streaming response using Server-Sent Events + */ +async function handleStreamingResponse( + res: ServerResponse, + response: vscode.LanguageModelChatResponse, + chatContext: ChatCompletionContext, + requestBody: ChatCompletionRequest +): Promise { + const streamHandler = new StreamingResponseHandler(res, chatContext, requestBody); + streamHandler.initializeStream(); + await streamHandler.processAndStreamResponse(response); +} - let content = ''; - for await (const fragment of response.text) content += fragment; - verbose(`Non-stream complete len=${content.length}`); - writeJson(res, 200, { - id: `cmpl_${Math.random().toString(36).slice(2)}`, - object: 'chat.completion', - choices: [ - { - index: 0, - message: { role: 'assistant', content }, - finish_reason: 'stop', - }, - ], - }); -}; +/** + * Handles non-streaming response with complete data + */ +async function handleNonStreamingResponse( + res: ServerResponse, + response: vscode.LanguageModelChatResponse, + chatContext: ChatCompletionContext, + requestBody: ChatCompletionRequest +): Promise { + const processedData = await processLanguageModelResponse(response); + sendCompletionResponse(res, chatContext, processedData, requestBody); +} diff --git a/src/http/routes/health.ts b/src/http/routes/health.ts index 87eb9a8..2da6ba8 100644 --- a/src/http/routes/health.ts +++ b/src/http/routes/health.ts @@ -5,19 +5,57 @@ import { hasLMApi, getModel } from '../../models'; import { state } from '../../state'; import { verbose } from '../../log'; +interface HealthResponse { + readonly ok: boolean; + readonly status: string; + readonly copilot: string; + readonly reason?: string; + readonly version: string; + readonly features: { + readonly chat_completions: boolean; + readonly streaming: boolean; + readonly tool_calling: boolean; + readonly function_calling: boolean; + readonly models_list: boolean; + }; + readonly active_requests: number; + readonly model_attempted?: boolean; +} + export const handleHealthCheck = async (res: ServerResponse, v: boolean): Promise => { const hasLM = hasLMApi(); + + // Attempt model resolution if cache is empty and verbose logging is enabled if (!state.modelCache && v) { verbose(`Healthz: model=${state.modelCache ? 'present' : 'missing'} lmApi=${hasLM ? 'ok' : 'missing'}`); - await getModel(); + try { + await getModel(); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + verbose(`Health check model resolution failed: ${msg}`); + } } + const unavailableReason = state.modelCache ? undefined : (!hasLM ? 'missing_language_model_api' : (state.lastReason || 'copilot_model_unavailable')); - writeJson(res, 200, { + + const response: HealthResponse = { ok: true, + status: 'operational', copilot: state.modelCache ? 'ok' : 'unavailable', reason: unavailableReason, version: vscode.version, - }); + features: { + chat_completions: true, + streaming: true, + tool_calling: true, + function_calling: true, // deprecated but supported + models_list: true + }, + active_requests: state.activeRequests, + model_attempted: state.modelAttempted + }; + + writeJson(res, 200, response); }; diff --git a/src/http/routes/models.ts b/src/http/routes/models.ts index 10ea4e9..039c0ad 100644 --- a/src/http/routes/models.ts +++ b/src/http/routes/models.ts @@ -1,20 +1,47 @@ -import { writeJson } from '../utils'; +import { writeJson, writeErrorResponse } from '../utils'; import { listCopilotModels } from '../../models'; +import { verbose } from '../../log'; import type { ServerResponse } from 'http'; +interface ModelObject { + readonly id: string; + readonly object: 'model'; + readonly created: number; + readonly owned_by: string; + readonly permission: readonly unknown[]; + readonly root: string; + readonly parent: null; +} + +interface ModelsListResponse { + readonly object: 'list'; + readonly data: readonly ModelObject[]; +} + export const handleModelsRequest = async (res: ServerResponse): Promise => { try { - const models = await listCopilotModels(); - writeJson(res, 200, { - data: models.map((id: string) => ({ - id, - object: 'model', - owned_by: 'vscode-bridge', - })), - }); - } catch { - writeJson(res, 200, { - data: [], - }); + const modelIds = await listCopilotModels(); + verbose(`Models listed: ${modelIds.length} available`); + + const models: ModelObject[] = modelIds.map((id: string) => ({ + id, + object: 'model' as const, + created: Math.floor(Date.now() / 1000), + owned_by: 'copilot', + permission: [], + root: id, + parent: null, + })); + + const response: ModelsListResponse = { + object: 'list', + data: models, + }; + + writeJson(res, 200, response); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + verbose(`Models request failed: ${msg}`); + writeErrorResponse(res, 500, msg || 'Failed to list models', 'server_error', 'internal_error'); } }; diff --git a/src/messages.ts b/src/messages.ts index ac9d2c8..1ee8f1b 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -1,8 +1,12 @@ import * as vscode from 'vscode'; export interface ChatMessage { - readonly role: 'system' | 'user' | 'assistant'; - readonly content: string | MessageContent[]; + readonly role: 'system' | 'user' | 'assistant' | 'tool'; + readonly content?: string | MessageContent[] | null; + readonly name?: string; + readonly tool_calls?: ToolCall[]; + readonly tool_call_id?: string; + readonly function_call?: FunctionCall; } export interface MessageContent { @@ -11,22 +15,87 @@ export interface MessageContent { readonly [key: string]: unknown; } +export interface ToolCall { + readonly id: string; + readonly type: 'function'; + readonly function: FunctionCall; +} + +export interface FunctionCall { + readonly name: string; + readonly arguments: string; +} + +export interface Tool { + readonly type: 'function'; + readonly function: ToolFunction; +} + +export interface ToolFunction { + readonly name: string; + readonly description?: string; + readonly parameters?: object; +} + export interface ChatCompletionRequest { readonly model?: string; readonly messages: ChatMessage[]; readonly stream?: boolean; + readonly tools?: Tool[]; + readonly tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } }; + readonly parallel_tool_calls?: boolean; + readonly functions?: ToolFunction[]; // Deprecated, use tools instead + readonly function_call?: 'none' | 'auto' | { name: string }; // Deprecated, use tool_choice instead + readonly temperature?: number; + readonly top_p?: number; + readonly n?: number; + readonly stop?: string | string[]; + readonly max_tokens?: number; + readonly max_completion_tokens?: number; + readonly presence_penalty?: number; + readonly frequency_penalty?: number; + readonly logit_bias?: Record; + readonly logprobs?: boolean; + readonly top_logprobs?: number; + readonly user?: string; + readonly seed?: number; + readonly response_format?: { + readonly type: 'text' | 'json_object' | 'json_schema'; + readonly json_schema?: { + readonly name: string; + readonly schema: object; + readonly strict?: boolean; + }; + }; readonly [key: string]: unknown; } -const VALID_ROLES = ['system', 'user', 'assistant'] as const; +const VALID_ROLES = ['system', 'user', 'assistant', 'tool'] as const; type Role = typeof VALID_ROLES[number]; const isValidRole = (role: unknown): role is Role => typeof role === 'string' && VALID_ROLES.includes(role as Role); export const isChatMessage = (msg: unknown): msg is ChatMessage => { if (typeof msg !== 'object' || msg === null) return false; const candidate = msg as Record; - if (!('role' in candidate) || !('content' in candidate)) return false; - return isValidRole(candidate.role) && candidate.content !== undefined && candidate.content !== null; + if (!('role' in candidate)) return false; + if (!isValidRole(candidate.role)) return false; + + // Tool messages require tool_call_id and content + if (candidate.role === 'tool') { + return typeof candidate.tool_call_id === 'string' && + (typeof candidate.content === 'string' || candidate.content === null); + } + + // Assistant messages can have content and/or tool_calls/function_call + if (candidate.role === 'assistant') { + const hasContent = candidate.content !== undefined; + const hasToolCalls = Array.isArray(candidate.tool_calls); + const hasFunctionCall = typeof candidate.function_call === 'object' && candidate.function_call !== null; + return hasContent || hasToolCalls || hasFunctionCall; + } + + // System and user messages must have content + return candidate.content !== undefined && candidate.content !== null; }; export const isChatCompletionRequest = (body: unknown): body is ChatCompletionRequest => { @@ -37,6 +106,25 @@ export const isChatCompletionRequest = (body: unknown): body is ChatCompletionRe return Array.isArray(messages) && messages.length > 0 && messages.every(isChatMessage); }; +// Convert OpenAI tools to VS Code Language Model tools +export const convertOpenAIToolsToLM = (tools?: Tool[]): vscode.LanguageModelChatTool[] => { + if (!tools) return []; + return tools.map(tool => ({ + name: tool.function.name, + description: tool.function.description || '', + inputSchema: tool.function.parameters + })); +}; + +// Convert deprecated functions to tools format +export const convertFunctionsToTools = (functions?: ToolFunction[]): Tool[] => { + if (!functions) return []; + return functions.map(func => ({ + type: 'function' as const, + function: func + })); +}; + const toText = (content: unknown): string => { if (typeof content === 'string') return content; if (Array.isArray(content)) return content.map(toText).join('\n'); diff --git a/src/models.ts b/src/models.ts index 40d78d4..d7174e9 100644 --- a/src/models.ts +++ b/src/models.ts @@ -65,7 +65,8 @@ export const handleModelSelectionError = (error: unknown, family?: string): void export const listCopilotModels = async (): Promise => { try { - const models = await selectChatModels(); + // Filter for Copilot models only, consistent with getModel behavior + const models = await vscode.lm.selectChatModels({ vendor: 'copilot' }); const ids = models.map((m: vscode.LanguageModelChat) => { const normalized = m.family || m.id || m.name || 'copilot'; return `${normalized}`; diff --git a/src/services/model-service.ts b/src/services/model-service.ts new file mode 100644 index 0000000..562f899 --- /dev/null +++ b/src/services/model-service.ts @@ -0,0 +1,99 @@ +import type * as vscode from 'vscode'; +import type { ChatCompletionRequest } from '../messages'; +import type { + ModelValidationResult, + RequestProcessingContext, + ChatCompletionContext +} from '../types/openai-types'; +import { + extractAndMergeTools, + createLanguageModelRequestOptions +} from './request-processor'; +import { getModel, hasLMApi } from '../models'; +import { normalizeMessagesLM, convertOpenAIToolsToLM } from '../messages'; +import { getBridgeConfig } from '../config'; + +/** + * Service for validating models and creating request processing context + */ +export class ModelService { + + /** + * Validates the requested model and returns appropriate error details if invalid + * @param requestedModel - The model identifier from the request + * @returns Validation result with error details if model is unavailable + */ + public async validateModel(requestedModel?: string): Promise { + const model = await getModel(false, requestedModel); + + if (!model) { + const hasLM = hasLMApi(); + + if (requestedModel && hasLM) { + return { + isValid: false, + statusCode: 404, + errorType: 'invalid_request_error', + errorCode: 'model_not_found', + reason: 'not_found' + }; + } + + const reason = !hasLM ? 'missing_language_model_api' : 'copilot_model_unavailable'; + return { + isValid: false, + statusCode: 503, + errorType: 'server_error', + errorCode: 'copilot_unavailable', + reason + }; + } + + return { isValid: true }; + } + + /** + * Creates a complete request processing context from validated inputs + * @param body - The validated chat completion request + * @returns Processing context with all required elements for the Language Model API + */ + public async createProcessingContext(body: ChatCompletionRequest): Promise { + const model = await getModel(false, body.model); + if (!model) { + throw new Error('Model validation should be performed before creating processing context'); + } + + const config = getBridgeConfig(); + const mergedTools = extractAndMergeTools(body); + const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow); + const lmTools = convertOpenAIToolsToLM(mergedTools); + const requestOptions = createLanguageModelRequestOptions(lmTools); + + return { + model, + lmMessages: lmMessages as vscode.LanguageModelChatMessage[], + lmTools, + requestOptions, + mergedTools + }; + } + + /** + * Creates chat completion context for response formatting + * @param body - The chat completion request + * @param hasTools - Whether tools are present in the request + * @returns Context object for response handling + */ + public createChatCompletionContext( + body: ChatCompletionRequest, + hasTools: boolean + ): ChatCompletionContext { + return { + requestId: `chatcmpl-${Math.random().toString(36).slice(2)}`, + modelName: body.model || 'copilot', + created: Math.floor(Date.now() / 1000), + hasTools, + isStreaming: body.stream !== false + }; + } +} \ No newline at end of file diff --git a/src/services/request-processor.ts b/src/services/request-processor.ts new file mode 100644 index 0000000..ccdcf82 --- /dev/null +++ b/src/services/request-processor.ts @@ -0,0 +1,39 @@ +import type { ChatCompletionRequest, Tool } from '../messages'; +import type * as vscode from 'vscode'; + +/** + * Validates and extracts tool configurations from request body + * @param body - The parsed request body + * @returns Combined tools array including converted deprecated functions + */ +export function extractAndMergeTools(body: ChatCompletionRequest): Tool[] { + const tools = body.tools || []; + + if (body.functions) { + // Convert deprecated functions to tools format + const convertedTools: Tool[] = body.functions.map(func => ({ + type: 'function' as const, + function: func + })); + return [...tools, ...convertedTools]; + } + + return tools; +} + +/** + * Creates VS Code Language Model request options from processed context + * @param lmTools - Array of Language Model compatible tools + * @returns Request options object for the Language Model API + */ +export function createLanguageModelRequestOptions( + lmTools: vscode.LanguageModelChatTool[] +): vscode.LanguageModelChatRequestOptions { + const options: vscode.LanguageModelChatRequestOptions = {}; + + if (lmTools.length > 0) { + options.tools = lmTools; + } + + return options; +} \ No newline at end of file diff --git a/src/services/response-formatter.ts b/src/services/response-formatter.ts new file mode 100644 index 0000000..dce7f4a --- /dev/null +++ b/src/services/response-formatter.ts @@ -0,0 +1,158 @@ +import type * as vscode from 'vscode'; +import type { ServerResponse } from 'http'; +import type { + OpenAIResponse, + OpenAIChoice, + OpenAIMessage, + OpenAIToolCall, + ChatCompletionContext, + ProcessedResponseData +} from '../types/openai-types'; +import type { ChatCompletionRequest } from '../messages'; +import { writeJson } from '../http/utils'; +import { verbose } from '../log'; + +/** + * Processes VS Code Language Model stream parts into structured data + * @param response - The VS Code Language Model chat response + * @returns Promise resolving to processed content and tool calls + */ +export async function processLanguageModelResponse( + response: vscode.LanguageModelChatResponse +): Promise { + let content = ''; + const toolCalls: OpenAIToolCall[] = []; + + for await (const part of response.stream) { + if (isToolCallPart(part)) { + const toolCall: OpenAIToolCall = { + id: part.callId, + type: 'function', + function: { + name: part.name, + arguments: JSON.stringify(part.input) + } + }; + toolCalls.push(toolCall); + } else if (isTextPart(part)) { + content += extractTextContent(part); + } + } + + const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop'; + + return { + content, + toolCalls, + finishReason + }; +} + +/** + * Creates an OpenAI-compatible response message + * @param data - The processed response data + * @param requestBody - Original request body for backward compatibility + * @returns OpenAI message object + */ +export function createOpenAIMessage( + data: ProcessedResponseData, + requestBody?: ChatCompletionRequest +): OpenAIMessage { + const baseMessage = { + role: 'assistant' as const, + content: data.toolCalls.length > 0 ? null : data.content, + }; + + // Add tool_calls if present + if (data.toolCalls.length > 0) { + const messageWithTools = { + ...baseMessage, + tool_calls: data.toolCalls, + }; + + // For backward compatibility, also add function_call if there's exactly one tool call + if (data.toolCalls.length === 1 && requestBody?.function_call !== undefined) { + return { + ...messageWithTools, + function_call: { + name: data.toolCalls[0].function.name, + arguments: data.toolCalls[0].function.arguments + } + }; + } + + return messageWithTools; + } + + return baseMessage; +} + +/** + * Sends a complete (non-streaming) OpenAI-compatible response + * @param res - HTTP response object + * @param context - Chat completion context + * @param data - Processed response data + * @param requestBody - Original request body + */ +export function sendCompletionResponse( + res: ServerResponse, + context: ChatCompletionContext, + data: ProcessedResponseData, + requestBody?: ChatCompletionRequest +): void { + const message = createOpenAIMessage(data, requestBody); + + const responseObj: OpenAIResponse = { + id: context.requestId, + object: 'chat.completion', + created: context.created, + model: context.modelName, + choices: [{ + index: 0, + message, + finish_reason: data.finishReason, + }], + usage: { + prompt_tokens: 0, // VS Code API doesn't provide token counts + completion_tokens: 0, + total_tokens: 0 + } + }; + + verbose(`Non-stream complete len=${data.content.length} tool_calls=${data.toolCalls.length}`); + writeJson(res, 200, responseObj); +} + +/** + * Type guard for VS Code LanguageModelToolCallPart + */ +function isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart { + return part !== null && + typeof part === 'object' && + 'callId' in part && + 'name' in part && + 'input' in part; +} + +/** + * Type guard for text content parts + */ +function isTextPart(part: unknown): boolean { + return typeof part === 'string' || + (part !== null && typeof part === 'object' && 'value' in part); +} + +/** + * Extracts text content from various part types + */ +function extractTextContent(part: unknown): string { + if (typeof part === 'string') { + return part; + } + + if (part !== null && typeof part === 'object' && 'value' in part) { + return String((part as { value: unknown }).value) || ''; + } + + return ''; +} \ No newline at end of file diff --git a/src/services/streaming-handler.ts b/src/services/streaming-handler.ts new file mode 100644 index 0000000..a972027 --- /dev/null +++ b/src/services/streaming-handler.ts @@ -0,0 +1,190 @@ +import type * as vscode from 'vscode'; +import type { ServerResponse } from 'http'; +import type { + OpenAIResponse, + OpenAIToolCall, + ChatCompletionContext +} from '../types/openai-types'; +import type { ChatCompletionRequest } from '../messages'; +import { verbose } from '../log'; + +/** + * Handles Server-Sent Events streaming for OpenAI-compatible chat completions + */ +export class StreamingResponseHandler { + private readonly response: ServerResponse; + private readonly context: ChatCompletionContext; + private readonly requestBody?: ChatCompletionRequest; + + constructor( + response: ServerResponse, + context: ChatCompletionContext, + requestBody?: ChatCompletionRequest + ) { + this.response = response; + this.context = context; + this.requestBody = requestBody; + } + + /** + * Initializes the SSE stream with proper headers + */ + public initializeStream(): void { + this.response.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + }); + + verbose(`SSE start id=${this.context.requestId}`); + } + + /** + * Processes the Language Model response stream and sends SSE chunks + * @param languageModelResponse - VS Code Language Model response + */ + public async processAndStreamResponse( + languageModelResponse: vscode.LanguageModelChatResponse + ): Promise { + const toolCalls: OpenAIToolCall[] = []; + + for await (const part of languageModelResponse.stream) { + if (this.isToolCallPart(part)) { + const toolCall = this.createToolCallFromPart(part); + toolCalls.push(toolCall); + this.sendToolCallChunk(toolCall); + } else if (this.isTextPart(part)) { + const content = this.extractTextContent(part); + if (content) { + this.sendContentChunk(content); + } + } + } + + this.sendFinalChunk(toolCalls.length > 0 ? 'tool_calls' : 'stop'); + this.endStream(); + } + + /** + * Sends a content delta chunk + */ + private sendContentChunk(content: string): void { + const chunkResponse: OpenAIResponse = { + id: this.context.requestId, + object: 'chat.completion.chunk', + created: this.context.created, + model: this.context.modelName, + choices: [{ + index: 0, + delta: { content }, + finish_reason: null + }] + }; + + this.writeSSEData(chunkResponse); + } + + /** + * Sends a tool call chunk + */ + private sendToolCallChunk(toolCall: OpenAIToolCall): void { + const chunkResponse: OpenAIResponse = { + id: this.context.requestId, + object: 'chat.completion.chunk', + created: this.context.created, + model: this.context.modelName, + choices: [{ + index: 0, + delta: { + tool_calls: [toolCall] + }, + finish_reason: null + }] + }; + + this.writeSSEData(chunkResponse); + } + + /** + * Sends the final completion chunk with finish reason + */ + private sendFinalChunk(finishReason: 'stop' | 'tool_calls'): void { + const finalChunkResponse: OpenAIResponse = { + id: this.context.requestId, + object: 'chat.completion.chunk', + created: this.context.created, + model: this.context.modelName, + choices: [{ + index: 0, + delta: {}, + finish_reason: finishReason + }] + }; + + this.writeSSEData(finalChunkResponse); + } + + /** + * Ends the SSE stream + */ + private endStream(): void { + verbose(`SSE end id=${this.context.requestId}`); + this.response.write('data: [DONE]\n\n'); + this.response.end(); + } + + /** + * Writes data to the SSE stream + */ + private writeSSEData(data: OpenAIResponse): void { + this.response.write(`data: ${JSON.stringify(data)}\n\n`); + } + + /** + * Creates an OpenAI tool call from VS Code Language Model part + */ + private createToolCallFromPart(part: vscode.LanguageModelToolCallPart): OpenAIToolCall { + return { + id: part.callId, + type: 'function', + function: { + name: part.name, + arguments: JSON.stringify(part.input) + } + }; + } + + /** + * Type guard for VS Code LanguageModelToolCallPart + */ + private isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart { + return part !== null && + typeof part === 'object' && + 'callId' in part && + 'name' in part && + 'input' in part; + } + + /** + * Type guard for text content parts + */ + private isTextPart(part: unknown): boolean { + return typeof part === 'string' || + (part !== null && typeof part === 'object' && 'value' in part); + } + + /** + * Extracts text content from various part types + */ + private extractTextContent(part: unknown): string { + if (typeof part === 'string') { + return part; + } + + if (part !== null && typeof part === 'object' && 'value' in part) { + return String((part as { value: unknown }).value) || ''; + } + + return ''; + } +} \ No newline at end of file diff --git a/src/types/openai-types.ts b/src/types/openai-types.ts new file mode 100644 index 0000000..130e406 --- /dev/null +++ b/src/types/openai-types.ts @@ -0,0 +1,81 @@ +import type * as vscode from 'vscode'; +import type { Tool } from '../messages'; + +/** + * OpenAI API compatible types for request and response handling + */ + +export interface OpenAIToolCall { + readonly id: string; + readonly type: 'function'; + readonly function: { + readonly name: string; + readonly arguments: string; + }; +} + +export interface OpenAIMessage { + readonly role: 'assistant'; + readonly content: string | null; + readonly tool_calls?: OpenAIToolCall[]; + readonly function_call?: { + readonly name: string; + readonly arguments: string; + }; +} + +export interface OpenAIChoice { + readonly index: number; + readonly message?: OpenAIMessage; + readonly delta?: Partial; + readonly finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null; +} + +export interface OpenAIResponse { + readonly id: string; + readonly object: 'chat.completion' | 'chat.completion.chunk'; + readonly created: number; + readonly model: string; + readonly choices: OpenAIChoice[]; + readonly usage?: { + readonly prompt_tokens: number; + readonly completion_tokens: number; + readonly total_tokens: number; + }; +} + +export interface ChatCompletionContext { + readonly requestId: string; + readonly modelName: string; + readonly created: number; + readonly hasTools: boolean; + readonly isStreaming: boolean; +} + +export interface ProcessedResponseData { + readonly content: string; + readonly toolCalls: OpenAIToolCall[]; + readonly finishReason: OpenAIChoice['finish_reason']; +} + +/** + * Validates that the request model is available and properly configured + */ +export interface ModelValidationResult { + readonly isValid: boolean; + readonly statusCode?: number; + readonly errorType?: string; + readonly errorCode?: string; + readonly reason?: string; +} + +/** + * Consolidated request processing context for chat completions + */ +export interface RequestProcessingContext { + readonly model: vscode.LanguageModelChat; + readonly lmMessages: vscode.LanguageModelChatMessage[]; + readonly lmTools: vscode.LanguageModelChatTool[]; + readonly requestOptions: vscode.LanguageModelChatRequestOptions; + readonly mergedTools: Tool[]; +} \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index a0001ce..fb978bd 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -9,6 +9,7 @@ "sourceMap": true, "esModuleInterop": true, "allowSyntheticDefaultImports": true, + "forceConsistentCasingInFileNames": true, "types": ["node", "vscode"] }, "include": ["src/**/*.ts"]