mirror of
https://github.com/larsbaunwall/vscode-copilot-bridge.git
synced 2025-10-05 22:22:59 +00:00
Refactor implementation with focus on performance improvement
This commit is contained in:
parent
7e10f69d5f
commit
2dc54a0ea6
16 changed files with 492 additions and 606 deletions
29
.github/copilot-instructions.md
vendored
29
.github/copilot-instructions.md
vendored
|
|
@ -1,5 +1,28 @@
|
|||
Copilot instructions
|
||||
## Copilot Usage Notes
|
||||
|
||||
Look carefully through [AGENTS.md](../AGENTS.md) for a description of the project and how to contribute.
|
||||
Always skim [AGENTS.md](../AGENTS.md) before making changes—the document is the single source of truth for architecture, performance targets, and workflow expectations.
|
||||
|
||||
Follow instructions carefully.
|
||||
### Hot-path rules
|
||||
|
||||
- Reuse the helpers in `src/http/utils.ts` (`writeUnauthorized`, `writeNotFound`, `writeRateLimit`, `writeErrorResponse`) instead of hand-written JSON responses.
|
||||
- Preserve the SSE contract in `src/http/routes/chat.ts`: emit role chunk first, follow with `data: { ... }` payloads, and terminate with `data: [DONE]`.
|
||||
- When streaming, keep `socket.setNoDelay(true)` on the response socket to avoid latency regressions.
|
||||
- Honor `state.activeRequests` concurrency guard and return early 429s via `writeRateLimit`.
|
||||
|
||||
### Tool calling compatibility
|
||||
|
||||
- `mergeTools` already merges deprecated `functions`; prefer extending it over new code paths.
|
||||
- The bridge treats `tool_choice: "required"` like `"auto"` and ignores `parallel_tool_calls`—reflect this limitation in docs if behavior changes.
|
||||
- Stream tool call deltas using `delta.tool_calls` chunks containing JSON-encoded argument strings. Downstream clients should replace, not append, argument fragments.
|
||||
|
||||
### Scope & contracts
|
||||
|
||||
- Public endpoints are `/health`, `/v1/models`, `/v1/chat/completions`. Changing contracts requires README updates and a version bump.
|
||||
- Keep the bridge loopback-only unless a new configuration knob is explicitly approved.
|
||||
- Update configuration docs when introducing new `bridge.*` settings and run `npm run compile` before handing off changes.
|
||||
|
||||
### Workflow
|
||||
|
||||
- Plan with the todo-list tool, keep diffs minimal, and avoid formatting unrelated regions.
|
||||
- Capture limitations or behavior differences (e.g., missing OpenAI response fields) in comments or docs so clients aren’t surprised.
|
||||
- Summarize reality after each change: what was touched, how it was verified, and any follow-ups.
|
||||
9
.github/instructions/ts.instructions.md
vendored
9
.github/instructions/ts.instructions.md
vendored
|
|
@ -13,6 +13,7 @@ applyTo: '**/*.ts'
|
|||
- Prefer readable, explicit solutions over clever shortcuts.
|
||||
- Extend current abstractions before inventing new ones.
|
||||
- Prioritize maintainability and clarity, short methods and classes, clean code.
|
||||
- Keep edits aligned with [AGENTS.md](../../AGENTS.md) and `.github/copilot-instructions.md`.
|
||||
|
||||
## Programming Language: TypeScript
|
||||
|
||||
|
|
@ -40,6 +41,11 @@ applyTo: '**/*.ts'
|
|||
- Use pure ES modules; never emit `require`, `module.exports`, or CommonJS helpers.
|
||||
- Rely on the project's build, lint, and test scripts unless asked otherwise.
|
||||
- Note design trade-offs when intent is not obvious.
|
||||
- Reuse the HTTP helpers in `src/http/utils.ts` (`writeUnauthorized`, `writeNotFound`, `writeRateLimit`, `writeErrorResponse`) instead of writing ad-hoc JSON responses.
|
||||
- Preserve the SSE contract in `src/http/routes/chat.ts`: send the role chunk first, follow with `data: { ... }` payloads, and always terminate with `data: [DONE]`.
|
||||
- When streaming, call `res.socket?.setNoDelay(true)` before emitting chunks to avoid latency regressions.
|
||||
- Honor the concurrency guard (`state.activeRequests`) and return early 429 responses via `writeRateLimit` when limits are exceeded.
|
||||
- Communicate limitations of the VS Code LM API, e.g., `tool_choice: "required"` behaving like `"auto"` and lack of `parallel_tool_calls` support.
|
||||
|
||||
## Project Organization
|
||||
|
||||
|
|
@ -75,6 +81,7 @@ applyTo: '**/*.ts'
|
|||
- Send errors through the project's logging/telemetry utilities.
|
||||
- Surface user-facing errors via the repository's notification pattern.
|
||||
- Debounce configuration-driven updates and dispose resources deterministically.
|
||||
- Prefer the pre-serialized error helpers for fast paths and document any new reason codes in README + status handlers.
|
||||
|
||||
## Architecture & Patterns
|
||||
|
||||
|
|
@ -126,6 +133,8 @@ applyTo: '**/*.ts'
|
|||
- Defer expensive work until users need it.
|
||||
- Batch or debounce high-frequency events to reduce thrash.
|
||||
- Track resource lifetimes to prevent leaks.
|
||||
- Avoid repeated configuration reads in hot paths; cache settings when practical.
|
||||
- Maintain streaming code paths without buffering entire responses; only accumulate when `stream: false`.
|
||||
|
||||
## Documentation & Comments
|
||||
|
||||
|
|
|
|||
|
|
@ -37,12 +37,12 @@ The server is **local only** (loopback host by default) and is not meant for mul
|
|||
2. **Imports**: All imports at file top. No inline `import('module')` types.
|
||||
3. **ES Module Style**: Use `import` syntax (even though `commonjs` output). No `require` in source except in isolated legacy shims (currently none).
|
||||
4. **Polka Typings**: The custom declaration in `src/types/polka.d.ts` must stay minimal but strongly typed. Extend only when you need new surface.
|
||||
5. **Error Handling**: Use central `onError` (`server.ts`). Avoid swallowing errors; bubble or log via `verbose`.
|
||||
5. **Error Handling**: Use central `onError` (`server.ts`). Avoid swallowing errors; bubble or log via `verbose`. Prefer the pre-serialized helpers in `src/http/utils.ts` (`writeUnauthorized`, `writeNotFound`, `writeRateLimit`, `writeErrorResponse`) instead of hand-crafted JSON bodies.
|
||||
6. **Logging**: Use `verbose()` for debug (guarded by config), `info()` for one‑time start messages, `error()` sparingly (currently not widely used—add only if user‑facing severity).
|
||||
7. **Status Bar**: Use `updateStatus(kind)` with kinds: `start | error | success`. Initial pending state relies on `state.modelAttempted`.
|
||||
8. **Model Selection**: Always feature‑detect the LM API (`hasLMApi`). Return early on missing API with clear `state.lastReason` codes.
|
||||
8. **Model Selection**: Always feature-detect the LM API (`hasLMApi`). Return early on missing API with clear `state.lastReason` codes.
|
||||
9. **Endpoint Stability**: Public paths (`/health`, `/v1/models`, `/v1/chat/completions`). Changes require README updates and semantic version bump.
|
||||
10. **Streaming**: SSE contract: multiple `data: {chunk}` events + final `data: [DONE]`. Preserve this shape.
|
||||
10. **Streaming & Tool Calling**: SSE contract: multiple `data: {chunk}` events + final `data: [DONE]`. Preserve this shape. Tool call chunks must emit `delta.tool_calls` entries encoded as JSON; arguments may arrive as incremental strings, so downstream clients should replace rather than append. The bridge treats `tool_choice: "required"` the same as `"auto"` and ignores `parallel_tool_calls` because the VS Code LM API lacks those controls—communicate this limitation in README and responses if behaviour changes in future.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -111,6 +111,7 @@ Avoid high‑volume logs in hot loops. Guard truly verbose details behind featur
|
|||
|
||||
- Concurrency limit enforced in `/v1/chat/completions` before model call; maintain early 429 path.
|
||||
- Streaming is async iteration; avoid buffering entire response unless `stream: false`.
|
||||
- Disable Nagle’s algorithm on streaming sockets with `socket.setNoDelay(true)` before writing SSE payloads.
|
||||
- Do not introduce global locks; keep per‑request ephemeral state.
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
|
||||
# Copilot Bridge (VS Code Extension)
|
||||
|
||||
[](https://marketplace.visualstudio.com/items?itemName=thinkability.copilot-bridge)
|
||||
[](https://marketplace.visualstudio.com/items?itemName=thinkability.copilot-bridge)
|
||||
[](https://marketplace.visualstudio.com/items?itemName=thinkability.copilot-bridge)
|
||||
|
||||
Expose GitHub Copilot as a local, OpenAI-compatible HTTP endpoint running inside VS Code. The bridge forwards chat requests to Copilot using the VS Code Language Model API and streams results back to you.
|
||||
|
||||
|
|
@ -23,6 +24,7 @@ The extension will autostart and requires VS Code to be running.
|
|||
|
||||
## Changelog
|
||||
|
||||
- **v1.1.0** — Simplified architecture with focus on performance improvements. Copilot Bridge is now 20-30% faster doing raw inference.
|
||||
- **v1.0.0** — Modular architecture refactor with service layer, OpenAI type definitions, and tool calling support
|
||||
- **v0.2.2** — Polka HTTP server integration and model family selection improvements
|
||||
- **v0.1.5** — Server lifecycle fixes and improved error handling
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
"name": "copilot-bridge",
|
||||
"displayName": "Copilot Bridge",
|
||||
"description": "Local OpenAI-compatible chat endpoint (inference) bridging to GitHub Copilot via the VS Code Language Model API.",
|
||||
"version": "1.0.0",
|
||||
"version": "1.1.0",
|
||||
"publisher": "thinkability",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
|
@ -22,10 +22,7 @@
|
|||
"Other"
|
||||
],
|
||||
"activationEvents": [
|
||||
"onStartupFinished",
|
||||
"onCommand:bridge.enable",
|
||||
"onCommand:bridge.disable",
|
||||
"onCommand:bridge.status"
|
||||
"onStartupFinished"
|
||||
],
|
||||
"main": "./out/extension.js",
|
||||
"contributes": {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,21 @@
|
|||
import type { IncomingMessage } from 'http';
|
||||
|
||||
export const isAuthorized = (req: IncomingMessage, token: string): boolean =>
|
||||
!token || req.headers.authorization === `Bearer ${token}`;
|
||||
// Cache the authorization header to avoid repeated concatenation
|
||||
let cachedToken = '';
|
||||
let cachedAuthHeader = '';
|
||||
|
||||
/**
|
||||
* Checks if the request is authorized against the configured token.
|
||||
* Caches the full "Bearer <token>" header to optimize hot path.
|
||||
*/
|
||||
export const isAuthorized = (req: IncomingMessage, token: string): boolean => {
|
||||
if (!token) return true;
|
||||
|
||||
// Update cache if token changed
|
||||
if (token !== cachedToken) {
|
||||
cachedToken = token;
|
||||
cachedAuthHeader = `Bearer ${token}`;
|
||||
}
|
||||
|
||||
return req.headers.authorization === cachedAuthHeader;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,16 +1,29 @@
|
|||
import * as vscode from 'vscode';
|
||||
import type { IncomingMessage, ServerResponse } from 'http';
|
||||
import { state } from '../../state';
|
||||
import { isChatCompletionRequest, type ChatCompletionRequest } from '../../messages';
|
||||
import { readJson, writeErrorResponse } from '../utils';
|
||||
import {
|
||||
isChatCompletionRequest,
|
||||
type ChatCompletionRequest,
|
||||
normalizeMessagesLM,
|
||||
convertOpenAIToolsToLM,
|
||||
convertFunctionsToTools,
|
||||
type Tool,
|
||||
} from '../../messages';
|
||||
import { readJson, writeErrorResponse, writeJson } from '../utils';
|
||||
import { verbose } from '../../log';
|
||||
import { ModelService } from '../../services/model-service';
|
||||
import { StreamingResponseHandler } from '../../services/streaming-handler';
|
||||
import { processLanguageModelResponse, sendCompletionResponse } from '../../services/response-formatter';
|
||||
import type { ChatCompletionContext } from '../../types/openai-types';
|
||||
import { getModel, hasLMApi } from '../../models';
|
||||
import { getBridgeConfig } from '../../config';
|
||||
import type {
|
||||
ChatCompletionContext,
|
||||
ProcessedResponseData,
|
||||
OpenAIResponse,
|
||||
OpenAIMessage,
|
||||
OpenAIToolCall,
|
||||
OpenAIChoice,
|
||||
} from '../../types/openai-types';
|
||||
|
||||
/**
|
||||
* Handles OpenAI-compatible chat completion requests with support for streaming and tool calling
|
||||
* Handles OpenAI-compatible chat completion requests with support for streaming and tool calling.
|
||||
* @param req - HTTP request object
|
||||
* @param res - HTTP response object
|
||||
*/
|
||||
|
|
@ -21,46 +34,49 @@ export async function handleChatCompletion(req: IncomingMessage, res: ServerResp
|
|||
try {
|
||||
const body = await readJson(req);
|
||||
if (!isChatCompletionRequest(body)) {
|
||||
return writeErrorResponse(res, 400, 'invalid request', 'invalid_request_error', 'invalid_payload');
|
||||
writeErrorResponse(res, 400, 'invalid request', 'invalid_request_error', 'invalid_payload');
|
||||
return;
|
||||
}
|
||||
|
||||
const modelService = new ModelService();
|
||||
|
||||
// Validate model availability
|
||||
const modelValidation = await modelService.validateModel(body.model);
|
||||
if (!modelValidation.isValid) {
|
||||
const errorMessage = body.model ? 'model not found' : 'Copilot unavailable';
|
||||
return writeErrorResponse(
|
||||
res,
|
||||
modelValidation.statusCode!,
|
||||
errorMessage,
|
||||
modelValidation.errorType!,
|
||||
modelValidation.errorCode!,
|
||||
modelValidation.reason || 'unknown_error'
|
||||
);
|
||||
const model = await resolveModel(body.model, res);
|
||||
if (!model) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Create processing context
|
||||
const context = await modelService.createProcessingContext(body);
|
||||
const chatContext = modelService.createChatCompletionContext(body, context.lmTools.length > 0);
|
||||
|
||||
verbose(`LM request via API model=${context.model.family || context.model.id || context.model.name || 'unknown'} tools=${context.lmTools.length}`);
|
||||
const config = getBridgeConfig();
|
||||
const mergedTools = mergeTools(body);
|
||||
const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow);
|
||||
const lmTools = convertOpenAIToolsToLM(mergedTools);
|
||||
const requestOptions: vscode.LanguageModelChatRequestOptions = lmTools.length > 0
|
||||
? { tools: lmTools }
|
||||
: {};
|
||||
|
||||
const modelName = selectResponseModelName(model, body.model);
|
||||
const chatContext = createChatCompletionContext(body, mergedTools.length > 0, modelName);
|
||||
verbose(`LM request via API model=${model.family || model.id || model.name || 'unknown'} tools=${lmTools.length}`);
|
||||
|
||||
// Execute the Language Model request
|
||||
const cancellationToken = new vscode.CancellationTokenSource();
|
||||
const response = await context.model.sendRequest(
|
||||
context.lmMessages,
|
||||
context.requestOptions,
|
||||
cancellationToken.token
|
||||
);
|
||||
|
||||
// Handle response based on streaming preference
|
||||
if (chatContext.isStreaming) {
|
||||
await handleStreamingResponse(res, response, chatContext, body);
|
||||
} else {
|
||||
await handleNonStreamingResponse(res, response, chatContext, body);
|
||||
try {
|
||||
const response = await model.sendRequest(
|
||||
lmMessages as vscode.LanguageModelChatMessage[],
|
||||
requestOptions,
|
||||
cancellationToken.token
|
||||
);
|
||||
|
||||
try {
|
||||
if (chatContext.isStreaming) {
|
||||
await streamResponse(res, response, chatContext);
|
||||
} else {
|
||||
const processed = await collectResponseData(response);
|
||||
sendCompletionResponse(res, chatContext, processed, body);
|
||||
}
|
||||
} finally {
|
||||
disposeResponse(response);
|
||||
}
|
||||
} finally {
|
||||
cancellationToken.dispose();
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
writeErrorResponse(res, 500, errorMessage || 'internal_error', 'server_error', 'internal_error');
|
||||
|
|
@ -70,29 +86,295 @@ export async function handleChatCompletion(req: IncomingMessage, res: ServerResp
|
|||
}
|
||||
}
|
||||
|
||||
const SSE_HEADERS = {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
Connection: 'keep-alive',
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Handles streaming response using Server-Sent Events
|
||||
* Merges tools and deprecated functions, respecting tool_choice configuration.
|
||||
* @param body - Chat completion request
|
||||
* @returns Filtered array of tools to use
|
||||
*/
|
||||
async function handleStreamingResponse(
|
||||
res: ServerResponse,
|
||||
response: vscode.LanguageModelChatResponse,
|
||||
chatContext: ChatCompletionContext,
|
||||
requestBody: ChatCompletionRequest
|
||||
): Promise<void> {
|
||||
const streamHandler = new StreamingResponseHandler(res, chatContext, requestBody);
|
||||
streamHandler.initializeStream();
|
||||
await streamHandler.processAndStreamResponse(response);
|
||||
function mergeTools(body: ChatCompletionRequest): Tool[] {
|
||||
// Early exit for disabled tools
|
||||
if (body.tool_choice === 'none' || body.function_call === 'none') {
|
||||
return [];
|
||||
}
|
||||
|
||||
const baseTools = body.tools ?? [];
|
||||
const functionTools = convertFunctionsToTools(body.functions);
|
||||
const combined = functionTools.length > 0 ? [...baseTools, ...functionTools] : baseTools;
|
||||
|
||||
// Handle specific tool selection
|
||||
if (
|
||||
body.tool_choice &&
|
||||
typeof body.tool_choice === 'object' &&
|
||||
'type' in body.tool_choice &&
|
||||
body.tool_choice.type === 'function' &&
|
||||
'function' in body.tool_choice &&
|
||||
body.tool_choice.function &&
|
||||
typeof body.tool_choice.function === 'object' &&
|
||||
'name' in body.tool_choice.function
|
||||
) {
|
||||
const fnName = body.tool_choice.function.name;
|
||||
if (typeof fnName === 'string') {
|
||||
return combined.filter((tool) => tool.function.name === fnName);
|
||||
}
|
||||
}
|
||||
|
||||
return combined;
|
||||
}
|
||||
|
||||
async function resolveModel(
|
||||
requestedModel: string | undefined,
|
||||
res: ServerResponse
|
||||
): Promise<vscode.LanguageModelChat | undefined> {
|
||||
const model = await getModel(false, requestedModel);
|
||||
if (model) {
|
||||
return model;
|
||||
}
|
||||
|
||||
const hasLanguageModels = hasLMApi();
|
||||
if (requestedModel && hasLanguageModels) {
|
||||
writeErrorResponse(res, 404, 'model not found', 'invalid_request_error', 'model_not_found', 'not_found');
|
||||
} else {
|
||||
const reason = hasLanguageModels ? 'copilot_model_unavailable' : 'missing_language_model_api';
|
||||
writeErrorResponse(res, 503, 'Copilot unavailable', 'server_error', 'copilot_unavailable', reason);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function createChatCompletionContext(
|
||||
body: ChatCompletionRequest,
|
||||
hasTools: boolean,
|
||||
modelName: string
|
||||
): ChatCompletionContext {
|
||||
return {
|
||||
requestId: `chatcmpl-${Math.random().toString(36).slice(2)}`,
|
||||
modelName,
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
hasTools,
|
||||
isStreaming: body.stream === true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles non-streaming response with complete data
|
||||
* Streams chat completion response using Server-Sent Events.
|
||||
* @param res - HTTP response object
|
||||
* @param response - VS Code Language Model response
|
||||
* @param context - Chat completion context
|
||||
*/
|
||||
async function handleNonStreamingResponse(
|
||||
async function streamResponse(
|
||||
res: ServerResponse,
|
||||
response: vscode.LanguageModelChatResponse,
|
||||
chatContext: ChatCompletionContext,
|
||||
requestBody: ChatCompletionRequest
|
||||
context: ChatCompletionContext
|
||||
): Promise<void> {
|
||||
const processedData = await processLanguageModelResponse(response);
|
||||
sendCompletionResponse(res, chatContext, processedData, requestBody);
|
||||
// Disable Nagle's algorithm for lower latency streaming
|
||||
if (res.socket) {
|
||||
res.socket.setNoDelay(true);
|
||||
}
|
||||
|
||||
res.writeHead(200, SSE_HEADERS);
|
||||
if (typeof res.flushHeaders === 'function') {
|
||||
res.flushHeaders();
|
||||
}
|
||||
verbose(`SSE start id=${context.requestId}`);
|
||||
|
||||
let sawToolCall = false;
|
||||
let sentRoleChunk = false;
|
||||
|
||||
for await (const part of response.stream) {
|
||||
// Send initial role chunk once
|
||||
if (!sentRoleChunk) {
|
||||
writeSseData(res, createChunkResponse(context, { role: 'assistant' }, null));
|
||||
sentRoleChunk = true;
|
||||
}
|
||||
|
||||
if (isToolCallPart(part)) {
|
||||
sawToolCall = true;
|
||||
writeSseData(res, createChunkResponse(context, {
|
||||
tool_calls: [createToolCall(part)],
|
||||
}, null));
|
||||
} else {
|
||||
const content = extractTextContent(part);
|
||||
if (content) {
|
||||
writeSseData(res, createChunkResponse(context, { content }, null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure role chunk is sent even for empty responses
|
||||
if (!sentRoleChunk) {
|
||||
writeSseData(res, createChunkResponse(context, { role: 'assistant' }, null));
|
||||
}
|
||||
|
||||
const finalChunk = createChunkResponse(context, {}, sawToolCall ? 'tool_calls' : 'stop');
|
||||
writeSseData(res, finalChunk);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
verbose(`SSE end id=${context.requestId}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects complete response data from VS Code Language Model stream.
|
||||
* @param response - VS Code Language Model response
|
||||
* @returns Processed response data with content and tool calls
|
||||
*/
|
||||
async function collectResponseData(
|
||||
response: vscode.LanguageModelChatResponse
|
||||
): Promise<ProcessedResponseData> {
|
||||
let content = '';
|
||||
const toolCalls: OpenAIToolCall[] = [];
|
||||
|
||||
for await (const part of response.stream) {
|
||||
if (isToolCallPart(part)) {
|
||||
toolCalls.push(createToolCall(part));
|
||||
} else {
|
||||
content += extractTextContent(part);
|
||||
}
|
||||
}
|
||||
|
||||
const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop';
|
||||
return { content, toolCalls, finishReason };
|
||||
}
|
||||
|
||||
function sendCompletionResponse(
|
||||
res: ServerResponse,
|
||||
context: ChatCompletionContext,
|
||||
data: ProcessedResponseData,
|
||||
requestBody?: ChatCompletionRequest
|
||||
): void {
|
||||
const message = createOpenAIMessage(data, requestBody);
|
||||
const response: OpenAIResponse = {
|
||||
id: context.requestId,
|
||||
object: 'chat.completion',
|
||||
created: context.created,
|
||||
model: context.modelName,
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
message,
|
||||
finish_reason: data.finishReason,
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
total_tokens: 0,
|
||||
},
|
||||
};
|
||||
|
||||
verbose(`Non-stream complete len=${data.content.length} tool_calls=${data.toolCalls.length}`);
|
||||
writeJson(res, 200, response);
|
||||
}
|
||||
|
||||
function createOpenAIMessage(
|
||||
data: ProcessedResponseData,
|
||||
requestBody?: ChatCompletionRequest
|
||||
): OpenAIMessage {
|
||||
const base: OpenAIMessage = {
|
||||
role: 'assistant',
|
||||
content: data.toolCalls.length > 0 ? null : data.content,
|
||||
};
|
||||
|
||||
if (data.toolCalls.length === 0) {
|
||||
return base;
|
||||
}
|
||||
|
||||
const withTools: OpenAIMessage = {
|
||||
...base,
|
||||
tool_calls: data.toolCalls,
|
||||
};
|
||||
|
||||
if (data.toolCalls.length === 1 && requestBody?.function_call !== undefined) {
|
||||
return {
|
||||
...withTools,
|
||||
function_call: {
|
||||
name: data.toolCalls[0].function.name,
|
||||
arguments: data.toolCalls[0].function.arguments,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return withTools;
|
||||
}
|
||||
|
||||
function createChunkResponse(
|
||||
context: ChatCompletionContext,
|
||||
delta: Partial<OpenAIMessage>,
|
||||
finishReason: OpenAIChoice['finish_reason'] | null
|
||||
): OpenAIResponse {
|
||||
return {
|
||||
id: context.requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: context.created,
|
||||
model: context.modelName,
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta,
|
||||
finish_reason: finishReason,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function writeSseData(res: ServerResponse, data: OpenAIResponse): void {
|
||||
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
}
|
||||
|
||||
function createToolCall(part: vscode.LanguageModelToolCallPart): OpenAIToolCall {
|
||||
return {
|
||||
id: part.callId,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: part.name,
|
||||
arguments: JSON.stringify(part.input),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart {
|
||||
return (
|
||||
part !== null &&
|
||||
typeof part === 'object' &&
|
||||
'callId' in part &&
|
||||
'name' in part &&
|
||||
'input' in part
|
||||
);
|
||||
}
|
||||
|
||||
function extractTextContent(part: unknown): string {
|
||||
if (typeof part === 'string') {
|
||||
return part;
|
||||
}
|
||||
|
||||
if (part !== null && typeof part === 'object' && 'value' in part) {
|
||||
return String((part as { value: unknown }).value) || '';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
function disposeResponse(response: vscode.LanguageModelChatResponse): void {
|
||||
const disposable = response as { dispose?: () => void };
|
||||
if (typeof disposable.dispose === 'function') {
|
||||
disposable.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects the most appropriate model name for the response.
|
||||
* Prioritizes requested model, then model ID, family, name, and finally defaults to 'copilot'.
|
||||
* @param model - VS Code Language Model instance
|
||||
* @param requestedModel - Model name from the request
|
||||
* @returns Model name to use in response
|
||||
*/
|
||||
function selectResponseModelName(
|
||||
model: vscode.LanguageModelChat,
|
||||
requestedModel: string | undefined
|
||||
): string {
|
||||
return requestedModel ?? model.id ?? model.family ?? model.name ?? 'copilot';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import { isAuthorized } from './auth';
|
|||
import { handleHealthCheck } from './routes/health';
|
||||
import { handleModelsRequest } from './routes/models';
|
||||
import { handleChatCompletion } from './routes/chat';
|
||||
import { writeErrorResponse } from './utils';
|
||||
import { writeErrorResponse, writeNotFound, writeRateLimit, writeUnauthorized } from './utils';
|
||||
import { ensureOutput, verbose } from '../log';
|
||||
import { updateStatus } from '../status';
|
||||
|
||||
|
|
@ -26,20 +26,32 @@ export const startServer = async (): Promise<void> => {
|
|||
}
|
||||
},
|
||||
onNoMatch: (_req, res) => {
|
||||
writeErrorResponse(res, 404, 'not found', 'invalid_request_error', 'route_not_found');
|
||||
writeNotFound(res);
|
||||
},
|
||||
});
|
||||
|
||||
// Logging + auth middleware
|
||||
app.use((req: IncomingMessage & { method?: string; url?: string }, res: ServerResponse, next: () => void) => {
|
||||
verbose(`HTTP ${req.method} ${req.url}`);
|
||||
// Auth middleware - runs before all routes (except /health)
|
||||
app.use((req, res, next) => {
|
||||
const path = req.url ?? '/';
|
||||
if (path === '/health') {
|
||||
return next();
|
||||
}
|
||||
if (!isAuthorized(req, config.token)) {
|
||||
writeErrorResponse(res, 401, 'unauthorized', 'invalid_request_error', 'unauthorized');
|
||||
writeUnauthorized(res);
|
||||
return;
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// Verbose logging middleware
|
||||
const cfg = getBridgeConfig();
|
||||
if (cfg.verbose) {
|
||||
app.use((req, res, next) => {
|
||||
verbose(`${req.method} ${req.url}`);
|
||||
next();
|
||||
});
|
||||
}
|
||||
|
||||
app.get('/health', async (_req: IncomingMessage, res: ServerResponse) => {
|
||||
await handleHealthCheck(res, config.verbose);
|
||||
});
|
||||
|
|
@ -49,18 +61,15 @@ export const startServer = async (): Promise<void> => {
|
|||
});
|
||||
|
||||
app.post('/v1/chat/completions', async (req: IncomingMessage, res: ServerResponse) => {
|
||||
// Rate limiting check
|
||||
if (state.activeRequests >= config.maxConcurrent) {
|
||||
res.writeHead(429, { 'Content-Type': 'application/json', 'Retry-After': '1' });
|
||||
res.end(JSON.stringify({
|
||||
error: {
|
||||
message: 'too many requests',
|
||||
type: 'rate_limit_error',
|
||||
code: 'rate_limit_exceeded',
|
||||
},
|
||||
}));
|
||||
verbose(`429 throttled (active=${state.activeRequests}, max=${config.maxConcurrent})`);
|
||||
if (config.verbose) {
|
||||
verbose(`429 throttled (active=${state.activeRequests}, max=${config.maxConcurrent})`);
|
||||
}
|
||||
writeRateLimit(res);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await handleChatCompletion(req, res);
|
||||
} catch (e) {
|
||||
|
|
|
|||
|
|
@ -9,8 +9,64 @@ export interface ErrorResponse {
|
|||
};
|
||||
}
|
||||
|
||||
// Pre-serialized common error responses for hot paths
|
||||
const UNAUTHORIZED_ERROR = JSON.stringify({
|
||||
error: {
|
||||
message: 'unauthorized',
|
||||
type: 'invalid_request_error',
|
||||
code: 'unauthorized',
|
||||
},
|
||||
});
|
||||
|
||||
const NOT_FOUND_ERROR = JSON.stringify({
|
||||
error: {
|
||||
message: 'not found',
|
||||
type: 'invalid_request_error',
|
||||
code: 'route_not_found',
|
||||
},
|
||||
});
|
||||
|
||||
const RATE_LIMIT_ERROR = JSON.stringify({
|
||||
error: {
|
||||
message: 'too many requests',
|
||||
type: 'rate_limit_error',
|
||||
code: 'rate_limit_exceeded',
|
||||
},
|
||||
});
|
||||
|
||||
// Reusable header objects
|
||||
const JSON_HEADERS = { 'Content-Type': 'application/json' } as const;
|
||||
const RATE_LIMIT_HEADERS = {
|
||||
'Content-Type': 'application/json',
|
||||
'Retry-After': '1',
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Fast-path unauthorized response (pre-serialized).
|
||||
*/
|
||||
export const writeUnauthorized = (res: ServerResponse): void => {
|
||||
res.writeHead(401, JSON_HEADERS);
|
||||
res.end(UNAUTHORIZED_ERROR);
|
||||
};
|
||||
|
||||
/**
|
||||
* Fast-path not found response (pre-serialized).
|
||||
*/
|
||||
export const writeNotFound = (res: ServerResponse): void => {
|
||||
res.writeHead(404, JSON_HEADERS);
|
||||
res.end(NOT_FOUND_ERROR);
|
||||
};
|
||||
|
||||
/**
|
||||
* Fast-path rate limit response (pre-serialized).
|
||||
*/
|
||||
export const writeRateLimit = (res: ServerResponse): void => {
|
||||
res.writeHead(429, RATE_LIMIT_HEADERS);
|
||||
res.end(RATE_LIMIT_ERROR);
|
||||
};
|
||||
|
||||
export const writeJson = <T>(res: ServerResponse, status: number, body: T): void => {
|
||||
res.writeHead(status, { 'Content-Type': 'application/json' });
|
||||
res.writeHead(status, JSON_HEADERS);
|
||||
res.end(JSON.stringify(body));
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -152,8 +152,9 @@ export const normalizeMessagesLM = (
|
|||
).slice(-histWindow * 3); // Increased window to account for tool messages
|
||||
|
||||
const lmMsg = (vscode as unknown as { LanguageModelChatMessage?: typeof vscode.LanguageModelChatMessage }).LanguageModelChatMessage;
|
||||
const UserFactory = lmMsg?.User;
|
||||
const AssistantFactory = lmMsg?.Assistant;
|
||||
const userFactory = lmMsg?.User;
|
||||
const assistantFactory = lmMsg?.Assistant;
|
||||
const hasFactories = Boolean(userFactory && assistantFactory);
|
||||
|
||||
const result: (vscode.LanguageModelChatMessage | { role: 'user' | 'assistant'; content: string })[] = [];
|
||||
let firstUserSeen = false;
|
||||
|
|
@ -165,7 +166,7 @@ export const normalizeMessagesLM = (
|
|||
text = `[SYSTEM]\n${toText(systemMessage.content)}\n\n[DIALOG]\nuser: ${text}`;
|
||||
firstUserSeen = true;
|
||||
}
|
||||
result.push(UserFactory ? UserFactory(text) : { role: 'user', content: text });
|
||||
result.push(userFactory ? userFactory(text) : { role: 'user', content: text });
|
||||
} else if (m.role === 'assistant') {
|
||||
// For assistant messages, we need to handle both content and tool calls
|
||||
let text = '';
|
||||
|
|
@ -192,20 +193,20 @@ export const normalizeMessagesLM = (
|
|||
text = `[FUNCTION_CALL] ${m.function_call.name}(${m.function_call.arguments})`;
|
||||
}
|
||||
|
||||
result.push(AssistantFactory ? AssistantFactory(text) : { role: 'assistant', content: text });
|
||||
result.push(assistantFactory ? assistantFactory(text) : { role: 'assistant', content: text });
|
||||
} else if (m.role === 'tool') {
|
||||
// Tool messages should be converted to user messages with tool result context
|
||||
const toolResult = `[TOOL_RESULT:${m.tool_call_id}] ${toText(m.content)}`;
|
||||
result.push(UserFactory ? UserFactory(toolResult) : { role: 'user', content: toolResult });
|
||||
result.push(userFactory ? userFactory(toolResult) : { role: 'user', content: toolResult });
|
||||
}
|
||||
}
|
||||
|
||||
if (!firstUserSeen && systemMessage) {
|
||||
const text = `[SYSTEM]\n${toText(systemMessage.content)}`;
|
||||
result.unshift(UserFactory ? UserFactory(text) : { role: 'user', content: text });
|
||||
result.unshift(userFactory ? userFactory(text) : { role: 'user', content: text });
|
||||
}
|
||||
|
||||
if (result.length === 0) result.push(UserFactory ? UserFactory('') : { role: 'user', content: '' });
|
||||
if (result.length === 0) result.push(userFactory ? userFactory('') : { role: 'user', content: '' });
|
||||
|
||||
return result;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,99 +0,0 @@
|
|||
import type * as vscode from 'vscode';
|
||||
import type { ChatCompletionRequest } from '../messages';
|
||||
import type {
|
||||
ModelValidationResult,
|
||||
RequestProcessingContext,
|
||||
ChatCompletionContext
|
||||
} from '../types/openai-types';
|
||||
import {
|
||||
extractAndMergeTools,
|
||||
createLanguageModelRequestOptions
|
||||
} from './request-processor';
|
||||
import { getModel, hasLMApi } from '../models';
|
||||
import { normalizeMessagesLM, convertOpenAIToolsToLM } from '../messages';
|
||||
import { getBridgeConfig } from '../config';
|
||||
|
||||
/**
|
||||
* Service for validating models and creating request processing context
|
||||
*/
|
||||
export class ModelService {
|
||||
|
||||
/**
|
||||
* Validates the requested model and returns appropriate error details if invalid
|
||||
* @param requestedModel - The model identifier from the request
|
||||
* @returns Validation result with error details if model is unavailable
|
||||
*/
|
||||
public async validateModel(requestedModel?: string): Promise<ModelValidationResult> {
|
||||
const model = await getModel(false, requestedModel);
|
||||
|
||||
if (!model) {
|
||||
const hasLM = hasLMApi();
|
||||
|
||||
if (requestedModel && hasLM) {
|
||||
return {
|
||||
isValid: false,
|
||||
statusCode: 404,
|
||||
errorType: 'invalid_request_error',
|
||||
errorCode: 'model_not_found',
|
||||
reason: 'not_found'
|
||||
};
|
||||
}
|
||||
|
||||
const reason = !hasLM ? 'missing_language_model_api' : 'copilot_model_unavailable';
|
||||
return {
|
||||
isValid: false,
|
||||
statusCode: 503,
|
||||
errorType: 'server_error',
|
||||
errorCode: 'copilot_unavailable',
|
||||
reason
|
||||
};
|
||||
}
|
||||
|
||||
return { isValid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a complete request processing context from validated inputs
|
||||
* @param body - The validated chat completion request
|
||||
* @returns Processing context with all required elements for the Language Model API
|
||||
*/
|
||||
public async createProcessingContext(body: ChatCompletionRequest): Promise<RequestProcessingContext> {
|
||||
const model = await getModel(false, body.model);
|
||||
if (!model) {
|
||||
throw new Error('Model validation should be performed before creating processing context');
|
||||
}
|
||||
|
||||
const config = getBridgeConfig();
|
||||
const mergedTools = extractAndMergeTools(body);
|
||||
const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow);
|
||||
const lmTools = convertOpenAIToolsToLM(mergedTools);
|
||||
const requestOptions = createLanguageModelRequestOptions(lmTools);
|
||||
|
||||
return {
|
||||
model,
|
||||
lmMessages: lmMessages as vscode.LanguageModelChatMessage[],
|
||||
lmTools,
|
||||
requestOptions,
|
||||
mergedTools
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates chat completion context for response formatting
|
||||
* @param body - The chat completion request
|
||||
* @param hasTools - Whether tools are present in the request
|
||||
* @returns Context object for response handling
|
||||
*/
|
||||
public createChatCompletionContext(
|
||||
body: ChatCompletionRequest,
|
||||
hasTools: boolean
|
||||
): ChatCompletionContext {
|
||||
return {
|
||||
requestId: `chatcmpl-${Math.random().toString(36).slice(2)}`,
|
||||
modelName: body.model || 'copilot',
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
hasTools,
|
||||
isStreaming: body.stream !== false
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
import type { ChatCompletionRequest, Tool } from '../messages';
|
||||
import type * as vscode from 'vscode';
|
||||
|
||||
/**
|
||||
* Validates and extracts tool configurations from request body
|
||||
* @param body - The parsed request body
|
||||
* @returns Combined tools array including converted deprecated functions
|
||||
*/
|
||||
export function extractAndMergeTools(body: ChatCompletionRequest): Tool[] {
|
||||
const tools = body.tools || [];
|
||||
|
||||
if (body.functions) {
|
||||
// Convert deprecated functions to tools format
|
||||
const convertedTools: Tool[] = body.functions.map(func => ({
|
||||
type: 'function' as const,
|
||||
function: func
|
||||
}));
|
||||
return [...tools, ...convertedTools];
|
||||
}
|
||||
|
||||
return tools;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates VS Code Language Model request options from processed context
|
||||
* @param lmTools - Array of Language Model compatible tools
|
||||
* @returns Request options object for the Language Model API
|
||||
*/
|
||||
export function createLanguageModelRequestOptions(
|
||||
lmTools: vscode.LanguageModelChatTool[]
|
||||
): vscode.LanguageModelChatRequestOptions {
|
||||
const options: vscode.LanguageModelChatRequestOptions = {};
|
||||
|
||||
if (lmTools.length > 0) {
|
||||
options.tools = lmTools;
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
import type * as vscode from 'vscode';
|
||||
import type { ServerResponse } from 'http';
|
||||
import type {
|
||||
OpenAIResponse,
|
||||
OpenAIChoice,
|
||||
OpenAIMessage,
|
||||
OpenAIToolCall,
|
||||
ChatCompletionContext,
|
||||
ProcessedResponseData
|
||||
} from '../types/openai-types';
|
||||
import type { ChatCompletionRequest } from '../messages';
|
||||
import { writeJson } from '../http/utils';
|
||||
import { verbose } from '../log';
|
||||
|
||||
/**
|
||||
* Processes VS Code Language Model stream parts into structured data
|
||||
* @param response - The VS Code Language Model chat response
|
||||
* @returns Promise resolving to processed content and tool calls
|
||||
*/
|
||||
export async function processLanguageModelResponse(
|
||||
response: vscode.LanguageModelChatResponse
|
||||
): Promise<ProcessedResponseData> {
|
||||
let content = '';
|
||||
const toolCalls: OpenAIToolCall[] = [];
|
||||
|
||||
for await (const part of response.stream) {
|
||||
if (isToolCallPart(part)) {
|
||||
const toolCall: OpenAIToolCall = {
|
||||
id: part.callId,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: part.name,
|
||||
arguments: JSON.stringify(part.input)
|
||||
}
|
||||
};
|
||||
toolCalls.push(toolCall);
|
||||
} else if (isTextPart(part)) {
|
||||
content += extractTextContent(part);
|
||||
}
|
||||
}
|
||||
|
||||
const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop';
|
||||
|
||||
return {
|
||||
content,
|
||||
toolCalls,
|
||||
finishReason
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an OpenAI-compatible response message
|
||||
* @param data - The processed response data
|
||||
* @param requestBody - Original request body for backward compatibility
|
||||
* @returns OpenAI message object
|
||||
*/
|
||||
export function createOpenAIMessage(
|
||||
data: ProcessedResponseData,
|
||||
requestBody?: ChatCompletionRequest
|
||||
): OpenAIMessage {
|
||||
const baseMessage = {
|
||||
role: 'assistant' as const,
|
||||
content: data.toolCalls.length > 0 ? null : data.content,
|
||||
};
|
||||
|
||||
// Add tool_calls if present
|
||||
if (data.toolCalls.length > 0) {
|
||||
const messageWithTools = {
|
||||
...baseMessage,
|
||||
tool_calls: data.toolCalls,
|
||||
};
|
||||
|
||||
// For backward compatibility, also add function_call if there's exactly one tool call
|
||||
if (data.toolCalls.length === 1 && requestBody?.function_call !== undefined) {
|
||||
return {
|
||||
...messageWithTools,
|
||||
function_call: {
|
||||
name: data.toolCalls[0].function.name,
|
||||
arguments: data.toolCalls[0].function.arguments
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return messageWithTools;
|
||||
}
|
||||
|
||||
return baseMessage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a complete (non-streaming) OpenAI-compatible response
|
||||
* @param res - HTTP response object
|
||||
* @param context - Chat completion context
|
||||
* @param data - Processed response data
|
||||
* @param requestBody - Original request body
|
||||
*/
|
||||
export function sendCompletionResponse(
|
||||
res: ServerResponse,
|
||||
context: ChatCompletionContext,
|
||||
data: ProcessedResponseData,
|
||||
requestBody?: ChatCompletionRequest
|
||||
): void {
|
||||
const message = createOpenAIMessage(data, requestBody);
|
||||
|
||||
const responseObj: OpenAIResponse = {
|
||||
id: context.requestId,
|
||||
object: 'chat.completion',
|
||||
created: context.created,
|
||||
model: context.modelName,
|
||||
choices: [{
|
||||
index: 0,
|
||||
message,
|
||||
finish_reason: data.finishReason,
|
||||
}],
|
||||
usage: {
|
||||
prompt_tokens: 0, // VS Code API doesn't provide token counts
|
||||
completion_tokens: 0,
|
||||
total_tokens: 0
|
||||
}
|
||||
};
|
||||
|
||||
verbose(`Non-stream complete len=${data.content.length} tool_calls=${data.toolCalls.length}`);
|
||||
writeJson(res, 200, responseObj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for VS Code LanguageModelToolCallPart
|
||||
*/
|
||||
function isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart {
|
||||
return part !== null &&
|
||||
typeof part === 'object' &&
|
||||
'callId' in part &&
|
||||
'name' in part &&
|
||||
'input' in part;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for text content parts
|
||||
*/
|
||||
function isTextPart(part: unknown): boolean {
|
||||
return typeof part === 'string' ||
|
||||
(part !== null && typeof part === 'object' && 'value' in part);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts text content from various part types
|
||||
*/
|
||||
function extractTextContent(part: unknown): string {
|
||||
if (typeof part === 'string') {
|
||||
return part;
|
||||
}
|
||||
|
||||
if (part !== null && typeof part === 'object' && 'value' in part) {
|
||||
return String((part as { value: unknown }).value) || '';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
import type * as vscode from 'vscode';
|
||||
import type { ServerResponse } from 'http';
|
||||
import type {
|
||||
OpenAIResponse,
|
||||
OpenAIToolCall,
|
||||
ChatCompletionContext
|
||||
} from '../types/openai-types';
|
||||
import type { ChatCompletionRequest } from '../messages';
|
||||
import { verbose } from '../log';
|
||||
|
||||
/**
|
||||
* Handles Server-Sent Events streaming for OpenAI-compatible chat completions
|
||||
*/
|
||||
export class StreamingResponseHandler {
|
||||
private readonly response: ServerResponse;
|
||||
private readonly context: ChatCompletionContext;
|
||||
private readonly requestBody?: ChatCompletionRequest;
|
||||
|
||||
constructor(
|
||||
response: ServerResponse,
|
||||
context: ChatCompletionContext,
|
||||
requestBody?: ChatCompletionRequest
|
||||
) {
|
||||
this.response = response;
|
||||
this.context = context;
|
||||
this.requestBody = requestBody;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the SSE stream with proper headers
|
||||
*/
|
||||
public initializeStream(): void {
|
||||
this.response.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
});
|
||||
|
||||
verbose(`SSE start id=${this.context.requestId}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the Language Model response stream and sends SSE chunks
|
||||
* @param languageModelResponse - VS Code Language Model response
|
||||
*/
|
||||
public async processAndStreamResponse(
|
||||
languageModelResponse: vscode.LanguageModelChatResponse
|
||||
): Promise<void> {
|
||||
const toolCalls: OpenAIToolCall[] = [];
|
||||
|
||||
for await (const part of languageModelResponse.stream) {
|
||||
if (this.isToolCallPart(part)) {
|
||||
const toolCall = this.createToolCallFromPart(part);
|
||||
toolCalls.push(toolCall);
|
||||
this.sendToolCallChunk(toolCall);
|
||||
} else if (this.isTextPart(part)) {
|
||||
const content = this.extractTextContent(part);
|
||||
if (content) {
|
||||
this.sendContentChunk(content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.sendFinalChunk(toolCalls.length > 0 ? 'tool_calls' : 'stop');
|
||||
this.endStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a content delta chunk
|
||||
*/
|
||||
private sendContentChunk(content: string): void {
|
||||
const chunkResponse: OpenAIResponse = {
|
||||
id: this.context.requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: this.context.created,
|
||||
model: this.context.modelName,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: { content },
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
|
||||
this.writeSSEData(chunkResponse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a tool call chunk
|
||||
*/
|
||||
private sendToolCallChunk(toolCall: OpenAIToolCall): void {
|
||||
const chunkResponse: OpenAIResponse = {
|
||||
id: this.context.requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: this.context.created,
|
||||
model: this.context.modelName,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {
|
||||
tool_calls: [toolCall]
|
||||
},
|
||||
finish_reason: null
|
||||
}]
|
||||
};
|
||||
|
||||
this.writeSSEData(chunkResponse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends the final completion chunk with finish reason
|
||||
*/
|
||||
private sendFinalChunk(finishReason: 'stop' | 'tool_calls'): void {
|
||||
const finalChunkResponse: OpenAIResponse = {
|
||||
id: this.context.requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created: this.context.created,
|
||||
model: this.context.modelName,
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: {},
|
||||
finish_reason: finishReason
|
||||
}]
|
||||
};
|
||||
|
||||
this.writeSSEData(finalChunkResponse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ends the SSE stream
|
||||
*/
|
||||
private endStream(): void {
|
||||
verbose(`SSE end id=${this.context.requestId}`);
|
||||
this.response.write('data: [DONE]\n\n');
|
||||
this.response.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes data to the SSE stream
|
||||
*/
|
||||
private writeSSEData(data: OpenAIResponse): void {
|
||||
this.response.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an OpenAI tool call from VS Code Language Model part
|
||||
*/
|
||||
private createToolCallFromPart(part: vscode.LanguageModelToolCallPart): OpenAIToolCall {
|
||||
return {
|
||||
id: part.callId,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: part.name,
|
||||
arguments: JSON.stringify(part.input)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for VS Code LanguageModelToolCallPart
|
||||
*/
|
||||
private isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart {
|
||||
return part !== null &&
|
||||
typeof part === 'object' &&
|
||||
'callId' in part &&
|
||||
'name' in part &&
|
||||
'input' in part;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for text content parts
|
||||
*/
|
||||
private isTextPart(part: unknown): boolean {
|
||||
return typeof part === 'string' ||
|
||||
(part !== null && typeof part === 'object' && 'value' in part);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts text content from various part types
|
||||
*/
|
||||
private extractTextContent(part: unknown): string {
|
||||
if (typeof part === 'string') {
|
||||
return part;
|
||||
}
|
||||
|
||||
if (part !== null && typeof part === 'object' && 'value' in part) {
|
||||
return String((part as { value: unknown }).value) || '';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
|
@ -23,9 +23,9 @@ export const updateStatus = (kind: BridgeStatusKind): void => {
|
|||
|
||||
switch (kind) {
|
||||
case 'start': {
|
||||
const availability = state.modelCache ? 'OK' : (state.modelAttempted ? 'Unavailable' : 'Pending');
|
||||
state.statusBarItem.text = `Copilot Bridge: ${availability} @ ${shown}`;
|
||||
info(`Started at http://${shown} | Copilot: ${state.modelCache ? 'ok' : (state.modelAttempted ? 'unavailable' : 'pending')}`);
|
||||
const availability = state.modelCache ? 'OK' : (state.modelAttempted ? 'Unavailable' : 'Pending');
|
||||
state.statusBarItem.text = `Copilot Bridge: ${availability} @ ${shown}`;
|
||||
info(`Started at http://${shown} | Copilot: ${state.modelCache ? 'ok' : (state.modelAttempted ? 'unavailable' : 'pending')}`);
|
||||
break;
|
||||
}
|
||||
case 'error':
|
||||
|
|
|
|||
|
|
@ -1,6 +1,3 @@
|
|||
import type * as vscode from 'vscode';
|
||||
import type { Tool } from '../messages';
|
||||
|
||||
/**
|
||||
* OpenAI API compatible types for request and response handling
|
||||
*/
|
||||
|
|
@ -56,26 +53,4 @@ export interface ProcessedResponseData {
|
|||
readonly content: string;
|
||||
readonly toolCalls: OpenAIToolCall[];
|
||||
readonly finishReason: OpenAIChoice['finish_reason'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that the request model is available and properly configured
|
||||
*/
|
||||
export interface ModelValidationResult {
|
||||
readonly isValid: boolean;
|
||||
readonly statusCode?: number;
|
||||
readonly errorType?: string;
|
||||
readonly errorCode?: string;
|
||||
readonly reason?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consolidated request processing context for chat completions
|
||||
*/
|
||||
export interface RequestProcessingContext {
|
||||
readonly model: vscode.LanguageModelChat;
|
||||
readonly lmMessages: vscode.LanguageModelChatMessage[];
|
||||
readonly lmTools: vscode.LanguageModelChatTool[];
|
||||
readonly requestOptions: vscode.LanguageModelChatRequestOptions;
|
||||
readonly mergedTools: Tool[];
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue