From e4a24785bbcd44d4e8b1baaf6eb859328bba4dda Mon Sep 17 00:00:00 2001
From: Lars Baunwall <larslb@thinkability.dk>
Date: Mon, 29 Sep 2025 17:59:38 +0200
Subject: [PATCH 1/3] Add copilot instructions

---
 src/http/routes/chat.ts   | 225 +++++++++++++++++++++++++++++++++-----
 src/http/routes/health.ts |  11 ++
 src/http/routes/models.ts |  30 +++--
 src/messages.ts           |  98 ++++++++++++++++-
 4 files changed, 324 insertions(+), 40 deletions(-)
diff --git a/src/http/routes/chat.ts b/src/http/routes/chat.ts
index c1dae65..190f7ad 100644
--- a/src/http/routes/chat.ts
+++ b/src/http/routes/chat.ts
@@ -2,11 +2,51 @@ import * as vscode from 'vscode';
 import type { IncomingMessage, ServerResponse } from 'http';
 import { state } from '../../state';
 import { getBridgeConfig } from '../../config';
-import { isChatCompletionRequest, normalizeMessagesLM } from '../../messages';
+import { isChatCompletionRequest, normalizeMessagesLM, convertOpenAIToolsToLM, convertFunctionsToTools } from '../../messages';
 import { getModel, hasLMApi } from '../../models';
 import { readJson, writeErrorResponse, writeJson } from '../utils';
 import { verbose } from '../../log';
 
+// OpenAI response interfaces for better typing
+interface OpenAIToolCall {
+  id: string;
+  type: 'function';
+  function: {
+    name: string;
+    arguments: string;
+  };
+}
+
+interface OpenAIMessage {
+  role: 'assistant';
+  content: string | null;
+  tool_calls?: OpenAIToolCall[];
+  function_call?: {
+    name: string;
+    arguments: string;
+  };
+}
+
+interface OpenAIChoice {
+  index: number;
+  message?: OpenAIMessage;
+  delta?: Partial<OpenAIMessage>;
+  finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
+}
+
+interface OpenAIResponse {
+  id: string;
+  object: 'chat.completion' | 'chat.completion.chunk';
+  created: number;
+  model: string;
+  choices: OpenAIChoice[];
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
 export const handleChatCompletion = async (req: IncomingMessage, res: ServerResponse): Promise<void> => {
   const config = getBridgeConfig();
   state.activeRequests++;
@@ -20,6 +60,14 @@ export const handleChatCompletion = async (req: IncomingMessage, res: ServerResp
 
     const requestedModel = body.model;
     const stream = body.stream !== false; // default true
+    
+    // Handle tools and deprecated functions
+    let tools = body.tools || [];
+    if (body.functions) {
+      // Convert deprecated functions to tools format
+      tools = [...tools, ...convertFunctionsToTools(body.functions)];
+    }
+    
     const model = await getModel(false, requestedModel);
 
     if (!model) {
@@ -33,11 +81,19 @@ export const handleChatCompletion = async (req: IncomingMessage, res: ServerResp
     }
 
     const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow) as vscode.LanguageModelChatMessage[];
-    verbose(`LM request via API model=${model.family || model.id || model.name || 'unknown'}`);
+    const lmTools = convertOpenAIToolsToLM(tools);
+    
+    // Prepare request options for Language Model API
+    const requestOptions: any = {};
+    if (lmTools.length > 0) {
+      requestOptions.tools = lmTools;
+    }
+    
+    verbose(`LM request via API model=${model.family || model.id || model.name || 'unknown'} tools=${lmTools.length}`);
 
     const cts = new vscode.CancellationTokenSource();
-    const response = await model.sendRequest(lmMessages, {}, cts.token);
-    await sendResponse(res, response, stream);
+    const response = await model.sendRequest(lmMessages, requestOptions, cts.token);
+    await sendResponse(res, response, stream, body, tools);
   } catch (e) {
     const msg = e instanceof Error ? e.message : String(e);
     writeErrorResponse(res, 500, msg || 'internal_error', 'server_error', 'internal_error');
@@ -47,40 +103,157 @@ export const handleChatCompletion = async (req: IncomingMessage, res: ServerResp
   }
 };
 
-const sendResponse = async (res: ServerResponse, response: vscode.LanguageModelChatResponse, stream: boolean): Promise<void> => {
+const sendResponse = async (
+  res: ServerResponse, 
+  response: vscode.LanguageModelChatResponse, 
+  stream: boolean,
+  requestBody?: any,
+  tools?: any[]
+): Promise<void> => {
+  const modelName = requestBody?.model || 'copilot';
+  const responseId = `chatcmpl-${Math.random().toString(36).slice(2)}`;
+  const created = Math.floor(Date.now() / 1000);
+
   if (stream) {
     res.writeHead(200, {
       'Content-Type': 'text/event-stream',
       'Cache-Control': 'no-cache',
       'Connection': 'keep-alive',
     });
-    const id = `cmp_${Math.random().toString(36).slice(2)}`;
-    verbose(`SSE start id=${id}`);
-    for await (const fragment of response.text) {
-      res.write(`data: ${JSON.stringify({
-        id,
-        object: 'chat.completion.chunk',
-        choices: [{ index: 0, delta: { content: fragment } }],
-      })}\n\n`);
+    
+    verbose(`SSE start id=${responseId}`);
+    
+    let toolCalls: OpenAIToolCall[] = [];
+    
+    for await (const part of response.stream) {
+      // Check if this part is a LanguageModelToolCallPart
+      if (part && typeof part === 'object' && 'callId' in part && 'name' in part && 'input' in part) {
+        const toolCallPart = part as vscode.LanguageModelToolCallPart;
+        const toolCall: OpenAIToolCall = {
+          id: toolCallPart.callId,
+          type: 'function',
+          function: {
+            name: toolCallPart.name,
+            arguments: JSON.stringify(toolCallPart.input)
+          }
+        };
+        toolCalls.push(toolCall);
+        
+        // Send tool call in streaming format
+        const chunkResponse: OpenAIResponse = {
+          id: responseId,
+          object: 'chat.completion.chunk',
+          created,
+          model: modelName,
+          choices: [{
+            index: 0,
+            delta: {
+              tool_calls: [toolCall]
+            },
+            finish_reason: null
+          }]
+        };
+        res.write(`data: ${JSON.stringify(chunkResponse)}\n\n`);
+      } else if (typeof part === 'string' || (part && typeof part === 'object' && 'value' in part)) {
+        // Handle text content
+        const content = typeof part === 'string' ? part : (part as any).value || '';
+        if (content) {
+          const chunkResponse: OpenAIResponse = {
+            id: responseId,
+            object: 'chat.completion.chunk',
+            created,
+            model: modelName,
+            choices: [{
+              index: 0,
+              delta: { content },
+              finish_reason: null
+            }]
+          };
+          res.write(`data: ${JSON.stringify(chunkResponse)}\n\n`);
+        }
+      }
     }
-    verbose(`SSE end id=${id}`);
+    
+    // Send final chunk
+    const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop';
+    const finalChunkResponse: OpenAIResponse = {
+      id: responseId,
+      object: 'chat.completion.chunk',
+      created,
+      model: modelName,
+      choices: [{
+        index: 0,
+        delta: {},
+        finish_reason: finishReason
+      }]
+    };
+    res.write(`data: ${JSON.stringify(finalChunkResponse)}\n\n`);
+    
+    verbose(`SSE end id=${responseId}`);
     res.write('data: [DONE]\n\n');
     res.end();
     return;
   }
 
+  // Non-streaming response
   let content = '';
-  for await (const fragment of response.text) content += fragment;
-  verbose(`Non-stream complete len=${content.length}`);
-  writeJson(res, 200, {
-    id: `cmpl_${Math.random().toString(36).slice(2)}`,
+  let toolCalls: OpenAIToolCall[] = [];
+  
+  for await (const part of response.stream) {
+    if (part && typeof part === 'object' && 'callId' in part && 'name' in part && 'input' in part) {
+      // Handle VS Code LanguageModelToolCallPart
+      const toolCallPart = part as vscode.LanguageModelToolCallPart;
+      const toolCall: OpenAIToolCall = {
+        id: toolCallPart.callId,
+        type: 'function',
+        function: {
+          name: toolCallPart.name,
+          arguments: JSON.stringify(toolCallPart.input)
+        }
+      };
+      toolCalls.push(toolCall);
+    } else if (typeof part === 'string' || (part && typeof part === 'object' && 'value' in part)) {
+      // Handle text content
+      content += typeof part === 'string' ? part : (part as any).value || '';
+    }
+  }
+  
+  verbose(`Non-stream complete len=${content.length} tool_calls=${toolCalls.length}`);
+  
+  const message: OpenAIMessage = {
+    role: 'assistant',
+    content: toolCalls.length > 0 ? null : content,
+  };
+  
+  // Add tool_calls if present
+  if (toolCalls.length > 0) {
+    message.tool_calls = toolCalls;
+    
+    // For backward compatibility, also add function_call if there's exactly one tool call
+    if (toolCalls.length === 1 && requestBody?.function_call !== undefined) {
+      message.function_call = {
+        name: toolCalls[0].function.name,
+        arguments: toolCalls[0].function.arguments
+      };
+    }
+  }
+  
+  const responseObj: OpenAIResponse = {
+    id: responseId,
     object: 'chat.completion',
-    choices: [
-      {
-        index: 0,
-        message: { role: 'assistant', content },
-        finish_reason: 'stop',
-      },
-    ],
-  });
+    created,
+    model: modelName,
+    choices: [{
+      index: 0,
+      message,
+      finish_reason: toolCalls.length > 0 ? 'tool_calls' : 'stop',
+    }],
+    usage: {
+      prompt_tokens: 0, // VS Code API doesn't provide token counts
+      completion_tokens: 0,
+      total_tokens: 0
+    }
+  };
+  
+  writeJson(res, 200, responseObj);
 };
diff --git a/src/http/routes/health.ts b/src/http/routes/health.ts
index 87eb9a8..23c4874 100644
--- a/src/http/routes/health.ts
+++ b/src/http/routes/health.ts
@@ -14,10 +14,21 @@ export const handleHealthCheck = async (res: ServerResponse, v: boolean): Promis
   const unavailableReason = state.modelCache
     ? undefined
     : (!hasLM ? 'missing_language_model_api' : (state.lastReason || 'copilot_model_unavailable'));
+  
   writeJson(res, 200, {
     ok: true,
+    status: 'operational',
     copilot: state.modelCache ? 'ok' : 'unavailable',
     reason: unavailableReason,
     version: vscode.version,
+    features: {
+      chat_completions: true,
+      streaming: true,
+      tool_calling: true,
+      function_calling: true, // deprecated but supported
+      models_list: true
+    },
+    active_requests: state.activeRequests,
+    model_attempted: state.modelAttempted
   });
 };
diff --git a/src/http/routes/models.ts b/src/http/routes/models.ts
index 10ea4e9..3e2569b 100644
--- a/src/http/routes/models.ts
+++ b/src/http/routes/models.ts
@@ -4,17 +4,29 @@ import type { ServerResponse } from 'http';
 
 export const handleModelsRequest = async (res: ServerResponse): Promise<void> => {
   try {
-    const models = await listCopilotModels();
+    const modelIds = await listCopilotModels();
+    const models = modelIds.map((id: string) => ({
+      id,
+      object: 'model',
+      created: Math.floor(Date.now() / 1000),
+      owned_by: 'copilot',
+      permission: [],
+      root: id,
+      parent: null,
+    }));
+
     writeJson(res, 200, {
-      data: models.map((id: string) => ({
-        id,
-        object: 'model',
-        owned_by: 'vscode-bridge',
-      })),
+      object: 'list',
+      data: models,
     });
-  } catch {
-    writeJson(res, 200, {
-      data: [],
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e);
+    writeJson(res, 500, {
+      error: {
+        message: msg || 'Failed to list models',
+        type: 'server_error',
+        code: 'internal_error'
+      }
     });
   }
 };
diff --git a/src/messages.ts b/src/messages.ts
index ac9d2c8..1ee8f1b 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -1,8 +1,12 @@
 import * as vscode from 'vscode';
 
 export interface ChatMessage {
-  readonly role: 'system' | 'user' | 'assistant';
-  readonly content: string | MessageContent[];
+  readonly role: 'system' | 'user' | 'assistant' | 'tool';
+  readonly content?: string | MessageContent[] | null;
+  readonly name?: string;
+  readonly tool_calls?: ToolCall[];
+  readonly tool_call_id?: string;
+  readonly function_call?: FunctionCall;
 }
 
 export interface MessageContent {
@@ -11,22 +15,87 @@ export interface MessageContent {
   readonly [key: string]: unknown;
 }
 
+export interface ToolCall {
+  readonly id: string;
+  readonly type: 'function';
+  readonly function: FunctionCall;
+}
+
+export interface FunctionCall {
+  readonly name: string;
+  readonly arguments: string;
+}
+
+export interface Tool {
+  readonly type: 'function';
+  readonly function: ToolFunction;
+}
+
+export interface ToolFunction {
+  readonly name: string;
+  readonly description?: string;
+  readonly parameters?: object;
+}
+
 export interface ChatCompletionRequest {
   readonly model?: string;
   readonly messages: ChatMessage[];
   readonly stream?: boolean;
+  readonly tools?: Tool[];
+  readonly tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } };
+  readonly parallel_tool_calls?: boolean;
+  readonly functions?: ToolFunction[]; // Deprecated, use tools instead
+  readonly function_call?: 'none' | 'auto' | { name: string }; // Deprecated, use tool_choice instead
+  readonly temperature?: number;
+  readonly top_p?: number;
+  readonly n?: number;
+  readonly stop?: string | string[];
+  readonly max_tokens?: number;
+  readonly max_completion_tokens?: number;
+  readonly presence_penalty?: number;
+  readonly frequency_penalty?: number;
+  readonly logit_bias?: Record<string, number>;
+  readonly logprobs?: boolean;
+  readonly top_logprobs?: number;
+  readonly user?: string;
+  readonly seed?: number;
+  readonly response_format?: {
+    readonly type: 'text' | 'json_object' | 'json_schema';
+    readonly json_schema?: {
+      readonly name: string;
+      readonly schema: object;
+      readonly strict?: boolean;
+    };
+  };
   readonly [key: string]: unknown;
 }
 
-const VALID_ROLES = ['system', 'user', 'assistant'] as const;
+const VALID_ROLES = ['system', 'user', 'assistant', 'tool'] as const;
 type Role = typeof VALID_ROLES[number];
 const isValidRole = (role: unknown): role is Role => typeof role === 'string' && VALID_ROLES.includes(role as Role);
 
 export const isChatMessage = (msg: unknown): msg is ChatMessage => {
   if (typeof msg !== 'object' || msg === null) return false;
   const candidate = msg as Record<string, unknown>;
-  if (!('role' in candidate) || !('content' in candidate)) return false;
-  return isValidRole(candidate.role) && candidate.content !== undefined && candidate.content !== null;
+  if (!('role' in candidate)) return false;
+  if (!isValidRole(candidate.role)) return false;
+  
+  // Tool messages require tool_call_id and content
+  if (candidate.role === 'tool') {
+    return typeof candidate.tool_call_id === 'string' && 
+           (typeof candidate.content === 'string' || candidate.content === null);
+  }
+  
+  // Assistant messages can have content and/or tool_calls/function_call
+  if (candidate.role === 'assistant') {
+    const hasContent = candidate.content !== undefined;
+    const hasToolCalls = Array.isArray(candidate.tool_calls);
+    const hasFunctionCall = typeof candidate.function_call === 'object' && candidate.function_call !== null;
+    return hasContent || hasToolCalls || hasFunctionCall;
+  }
+  
+  // System and user messages must have content
+  return candidate.content !== undefined && candidate.content !== null;
 };
 
 export const isChatCompletionRequest = (body: unknown): body is ChatCompletionRequest => {
@@ -37,6 +106,25 @@ export const isChatCompletionRequest = (body: unknown): body is ChatCompletionRe
   return Array.isArray(messages) && messages.length > 0 && messages.every(isChatMessage);
 };
 
+// Convert OpenAI tools to VS Code Language Model tools
+export const convertOpenAIToolsToLM = (tools?: Tool[]): vscode.LanguageModelChatTool[] => {
+  if (!tools) return [];
+  return tools.map(tool => ({
+    name: tool.function.name,
+    description: tool.function.description || '',
+    inputSchema: tool.function.parameters
+  }));
+};
+
+// Convert deprecated functions to tools format
+export const convertFunctionsToTools = (functions?: ToolFunction[]): Tool[] => {
+  if (!functions) return [];
+  return functions.map(func => ({
+    type: 'function' as const,
+    function: func
+  }));
+};
+
 const toText = (content: unknown): string => {
   if (typeof content === 'string') return content;
   if (Array.isArray(content)) return content.map(toText).join('\n');

From ef1526c76a14c26792f1e54d3c99b8c51977c97f Mon Sep 17 00:00:00 2001
From: Lars Baunwall <larslb@thinkability.dk>
Date: Mon, 29 Sep 2025 17:59:52 +0200
Subject: [PATCH 2/3] Add copilot instructions

---
 .github/copilot-instructions.md         |   5 +
 .github/instructions/ts.instructions.md | 134 ++++++++++++++++++++++++
 2 files changed, 139 insertions(+)
 create mode 100644 .github/copilot-instructions.md
 create mode 100644 .github/instructions/ts.instructions.md

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..b7978f2
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,5 @@
+Copilot instructions
+
+Look carefully through [AGENTS.md](../AGENTS.md) for a description of the project and how to contribute.
+
+Follow instructions carefully.
\ No newline at end of file
diff --git a/.github/instructions/ts.instructions.md b/.github/instructions/ts.instructions.md
new file mode 100644
index 0000000..e3e56a1
--- /dev/null
+++ b/.github/instructions/ts.instructions.md
@@ -0,0 +1,134 @@
+---
+description: 'Guidelines for TypeScript Development targeting TypeScript 5.x and ES2022 output'
+applyTo: '**/*.ts'
+---
+
+# TypeScript Development
+
+> These instructions assume projects are built with TypeScript 5.x (or newer) compiling to an ES2022 JavaScript baseline. Adjust guidance if your runtime requires older language targets or down-level transpilation.
+
+## Core Intent
+
+- Respect the existing architecture and coding standards.
+- Prefer readable, explicit solutions over clever shortcuts.
+- Extend current abstractions before inventing new ones.
+- Prioritize maintainability and clarity, short methods and classes, clean code.
+
+## Programming Language: TypeScript
+
+**TypeScript Best Practices:**
+- Use strict TypeScript configuration with `"strict": true`
+- Prefer interfaces over type aliases for object shapes
+- Use explicit return types for all public functions
+- Avoid `any` type - use `unknown` or proper typing instead
+- Use utility types (Pick, Omit, Partial) for type transformations
+- Implement proper null/undefined checking
+
+## Code Style: Clean Code
+
+**Clean Code Principles:**
+- Write self-documenting code with meaningful names
+- Keep functions small and focused on a single responsibility
+- Avoid deep nesting and complex conditional statements
+- Use consistent formatting and indentation
+- Write code that tells a story and is easy to understand
+- Refactor ruthlessly to eliminate code smells
+
+## General Guardrails
+
+- Target TypeScript 5.x / ES2022 and prefer native features over polyfills.
+- Use pure ES modules; never emit `require`, `module.exports`, or CommonJS helpers.
+- Rely on the project's build, lint, and test scripts unless asked otherwise.
+- Note design trade-offs when intent is not obvious.
+
+## Project Organization
+
+- Follow the repository's folder and responsibility layout for new code.
+- Use kebab-case filenames (e.g., `user-session.ts`, `data-service.ts`) unless told otherwise.
+- Keep tests, types, and helpers near their implementation when it aids discovery.
+- Reuse or extend shared utilities before adding new ones.
+
+## Naming & Style
+
+- Use PascalCase for classes, interfaces, enums, and type aliases; camelCase for everything else.
+- Skip interface prefixes like `I`; rely on descriptive names.
+- Name things for their behavior or domain meaning, not implementation.
+
+## Formatting & Style
+
+- Run the repository's lint/format scripts (e.g., `npm run lint`) before submitting.
+- Match the project's indentation, quote style, and trailing comma rules.
+- Keep functions focused; extract helpers when logic branches grow.
+- Favor immutable data and pure functions when practical.
+
+## Type System Expectations
+
+- Avoid `any` (implicit or explicit); prefer `unknown` plus narrowing.
+- Use discriminated unions for realtime events and state machines.
+- Centralize shared contracts instead of duplicating shapes.
+- Express intent with TypeScript utility types (e.g., `Readonly`, `Partial`, `Record`).
+
+## Async, Events & Error Handling
+
+- Use `async/await`; wrap awaits in try/catch with structured errors.
+- Guard edge cases early to avoid deep nesting.
+- Send errors through the project's logging/telemetry utilities.
+- Surface user-facing errors via the repository's notification pattern.
+- Debounce configuration-driven updates and dispose resources deterministically.
+
+## Architecture & Patterns
+
+- Follow the repository's dependency injection or composition pattern; keep modules single-purpose.
+- Observe existing initialization and disposal sequences when wiring into lifecycles.
+- Keep transport, domain, and presentation layers decoupled with clear interfaces.
+- Supply lifecycle hooks (e.g., `initialize`, `dispose`) and targeted tests when adding services.
+
+## External Integrations
+
+- Instantiate clients outside hot paths and inject them for testability.
+- Never hardcode secrets; load them from secure sources.
+- Apply retries, backoff, and cancellation to network or IO calls.
+- Normalize external responses and map errors to domain shapes.
+
+## Security Practices
+
+- Validate and sanitize external input with schema validators or type guards.
+- Avoid dynamic code execution and untrusted template rendering.
+- Encode untrusted content before rendering HTML; use framework escaping or trusted types.
+- Use parameterized queries or prepared statements to block injection.
+- Keep secrets in secure storage, rotate them regularly, and request least-privilege scopes.
+- Favor immutable flows and defensive copies for sensitive data.
+- Use vetted crypto libraries only.
+- Patch dependencies promptly and monitor advisories.
+
+## Configuration & Secrets
+
+- Reach configuration through shared helpers and validate with schemas or dedicated validators.
+- Handle secrets via the project's secure storage; guard `undefined` and error states.
+- Document new configuration keys and update related tests.
+
+## UI & UX Components
+
+- Sanitize user or external content before rendering.
+- Keep UI layers thin; push heavy logic to services or state managers.
+- Use messaging or events to decouple UI from business logic.
+
+## Testing Expectations
+
+- Add or update unit tests with the project's framework and naming style.
+- Expand integration or end-to-end suites when behavior crosses modules or platform APIs.
+- Run targeted test scripts for quick feedback before submitting.
+- Avoid brittle timing assertions; prefer fake timers or injected clocks.
+
+## Performance & Reliability
+
+- Lazy-load heavy dependencies and dispose them when done.
+- Defer expensive work until users need it.
+- Batch or debounce high-frequency events to reduce thrash.
+- Track resource lifetimes to prevent leaks.
+
+## Documentation & Comments
+
+- Add JSDoc to public APIs; include `@remarks` or `@example` when helpful.
+- Write comments that capture intent, and remove stale notes during refactors.
+- Update architecture or design docs when introducing significant patterns.
\ No newline at end of file

From 70a077ca518cf91ba8664be5186b02f80c0246e3 Mon Sep 17 00:00:00 2001
From: Lars Baunwall <larslb@thinkability.dk>
Date: Mon, 29 Sep 2025 18:27:29 +0200
Subject: [PATCH 3/3] Refactor code to be more clean and modularized. Bump
 package version

---
 package.json                       |   2 +-
 src/http/routes/chat.ts            | 307 +++++++----------------------
 src/http/routes/health.ts          |  33 +++-
 src/http/routes/models.ts          |  39 ++--
 src/models.ts                      |   3 +-
 src/services/model-service.ts      |  99 ++++++++++
 src/services/request-processor.ts  |  39 ++++
 src/services/response-formatter.ts | 158 +++++++++++++++
 src/services/streaming-handler.ts  | 190 ++++++++++++++++++
 src/types/openai-types.ts          |  81 ++++++++
 tsconfig.json                      |   1 +
 11 files changed, 701 insertions(+), 251 deletions(-)
 create mode 100644 src/services/model-service.ts
 create mode 100644 src/services/request-processor.ts
 create mode 100644 src/services/response-formatter.ts
 create mode 100644 src/services/streaming-handler.ts
 create mode 100644 src/types/openai-types.ts

diff --git a/package.json b/package.json
index 7cb4887..c081009 100644
--- a/package.json
+++ b/package.json
@@ -4,7 +4,7 @@
   "name": "copilot-bridge",
   "displayName": "Copilot Bridge",
   "description": "Local OpenAI-compatible chat endpoint (inference) bridging to GitHub Copilot via the VS Code Language Model API.",
-  "version": "0.2.2",
+  "version": "1.0.0",
   "publisher": "thinkability",
   "repository": {
     "type": "git",
diff --git a/src/http/routes/chat.ts b/src/http/routes/chat.ts
index 190f7ad..a450482 100644
--- a/src/http/routes/chat.ts
+++ b/src/http/routes/chat.ts
@@ -1,54 +1,20 @@
 import * as vscode from 'vscode';
 import type { IncomingMessage, ServerResponse } from 'http';
 import { state } from '../../state';
-import { getBridgeConfig } from '../../config';
-import { isChatCompletionRequest, normalizeMessagesLM, convertOpenAIToolsToLM, convertFunctionsToTools } from '../../messages';
-import { getModel, hasLMApi } from '../../models';
-import { readJson, writeErrorResponse, writeJson } from '../utils';
+import { isChatCompletionRequest, type ChatCompletionRequest } from '../../messages';
+import { readJson, writeErrorResponse } from '../utils';
 import { verbose } from '../../log';
+import { ModelService } from '../../services/model-service';
+import { StreamingResponseHandler } from '../../services/streaming-handler';
+import { processLanguageModelResponse, sendCompletionResponse } from '../../services/response-formatter';
+import type { ChatCompletionContext } from '../../types/openai-types';
 
-// OpenAI response interfaces for better typing
-interface OpenAIToolCall {
-  id: string;
-  type: 'function';
-  function: {
-    name: string;
-    arguments: string;
-  };
-}
-
-interface OpenAIMessage {
-  role: 'assistant';
-  content: string | null;
-  tool_calls?: OpenAIToolCall[];
-  function_call?: {
-    name: string;
-    arguments: string;
-  };
-}
-
-interface OpenAIChoice {
-  index: number;
-  message?: OpenAIMessage;
-  delta?: Partial<OpenAIMessage>;
-  finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
-}
-
-interface OpenAIResponse {
-  id: string;
-  object: 'chat.completion' | 'chat.completion.chunk';
-  created: number;
-  model: string;
-  choices: OpenAIChoice[];
-  usage?: {
-    prompt_tokens: number;
-    completion_tokens: number;
-    total_tokens: number;
-  };
-}
-
-export const handleChatCompletion = async (req: IncomingMessage, res: ServerResponse): Promise<void> => {
-  const config = getBridgeConfig();
+/**
+ * Handles OpenAI-compatible chat completion requests with support for streaming and tool calling
+ * @param req - HTTP request object
+ * @param res - HTTP response object
+ */
+export async function handleChatCompletion(req: IncomingMessage, res: ServerResponse): Promise<void> {
   state.activeRequests++;
   verbose(`Request started (active=${state.activeRequests})`);
 
@@ -58,202 +24,75 @@ export const handleChatCompletion = async (req: IncomingMessage, res: ServerResp
       return writeErrorResponse(res, 400, 'invalid request', 'invalid_request_error', 'invalid_payload');
     }
 
-    const requestedModel = body.model;
-    const stream = body.stream !== false; // default true
+    const modelService = new ModelService();
     
-    // Handle tools and deprecated functions
-    let tools = body.tools || [];
-    if (body.functions) {
-      // Convert deprecated functions to tools format
-      tools = [...tools, ...convertFunctionsToTools(body.functions)];
-    }
-    
-    const model = await getModel(false, requestedModel);
-
-    if (!model) {
-      const hasLM = hasLMApi();
-      if (requestedModel && hasLM) {
-        state.lastReason = 'not_found';
-        return writeErrorResponse(res, 404, 'model not found', 'invalid_request_error', 'model_not_found', 'not_found');
-      }
-      const reason = !hasLM ? 'missing_language_model_api' : (state.lastReason || 'copilot_model_unavailable');
-      return writeErrorResponse(res, 503, 'Copilot unavailable', 'server_error', 'copilot_unavailable', reason);
+    // Validate model availability
+    const modelValidation = await modelService.validateModel(body.model);
+    if (!modelValidation.isValid) {
+      const errorMessage = body.model ? 'model not found' : 'Copilot unavailable';
+      return writeErrorResponse(
+        res, 
+        modelValidation.statusCode!, 
+        errorMessage, 
+        modelValidation.errorType!, 
+        modelValidation.errorCode!, 
+        modelValidation.reason || 'unknown_error'
+      );
     }
 
-    const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow) as vscode.LanguageModelChatMessage[];
-    const lmTools = convertOpenAIToolsToLM(tools);
+    // Create processing context
+    const context = await modelService.createProcessingContext(body);
+    const chatContext = modelService.createChatCompletionContext(body, context.lmTools.length > 0);
     
-    // Prepare request options for Language Model API
-    const requestOptions: any = {};
-    if (lmTools.length > 0) {
-      requestOptions.tools = lmTools;
-    }
-    
-    verbose(`LM request via API model=${model.family || model.id || model.name || 'unknown'} tools=${lmTools.length}`);
+    verbose(`LM request via API model=${context.model.family || context.model.id || context.model.name || 'unknown'} tools=${context.lmTools.length}`);
 
-    const cts = new vscode.CancellationTokenSource();
-    const response = await model.sendRequest(lmMessages, requestOptions, cts.token);
-    await sendResponse(res, response, stream, body, tools);
-  } catch (e) {
-    const msg = e instanceof Error ? e.message : String(e);
-    writeErrorResponse(res, 500, msg || 'internal_error', 'server_error', 'internal_error');
+    // Execute the Language Model request
+    const cancellationToken = new vscode.CancellationTokenSource();
+    const response = await context.model.sendRequest(
+      context.lmMessages, 
+      context.requestOptions, 
+      cancellationToken.token
+    );
+
+    // Handle response based on streaming preference
+    if (chatContext.isStreaming) {
+      await handleStreamingResponse(res, response, chatContext, body);
+    } else {
+      await handleNonStreamingResponse(res, response, chatContext, body);
+    }
+
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    writeErrorResponse(res, 500, errorMessage || 'internal_error', 'server_error', 'internal_error');
   } finally {
     state.activeRequests--;
     verbose(`Request complete (active=${state.activeRequests})`);
   }
-};
+}
 
-const sendResponse = async (
-  res: ServerResponse, 
-  response: vscode.LanguageModelChatResponse, 
-  stream: boolean,
-  requestBody?: any,
-  tools?: any[]
-): Promise<void> => {
-  const modelName = requestBody?.model || 'copilot';
-  const responseId = `chatcmpl-${Math.random().toString(36).slice(2)}`;
-  const created = Math.floor(Date.now() / 1000);
+/**
+ * Handles streaming response using Server-Sent Events
+ */
+async function handleStreamingResponse(
+  res: ServerResponse,
+  response: vscode.LanguageModelChatResponse,
+  chatContext: ChatCompletionContext,
+  requestBody: ChatCompletionRequest
+): Promise<void> {
+  const streamHandler = new StreamingResponseHandler(res, chatContext, requestBody);
+  streamHandler.initializeStream();
+  await streamHandler.processAndStreamResponse(response);
+}
 
-  if (stream) {
-    res.writeHead(200, {
-      'Content-Type': 'text/event-stream',
-      'Cache-Control': 'no-cache',
-      'Connection': 'keep-alive',
-    });
-    
-    verbose(`SSE start id=${responseId}`);
-    
-    let toolCalls: OpenAIToolCall[] = [];
-    
-    for await (const part of response.stream) {
-      // Check if this part is a LanguageModelToolCallPart
-      if (part && typeof part === 'object' && 'callId' in part && 'name' in part && 'input' in part) {
-        const toolCallPart = part as vscode.LanguageModelToolCallPart;
-        const toolCall: OpenAIToolCall = {
-          id: toolCallPart.callId,
-          type: 'function',
-          function: {
-            name: toolCallPart.name,
-            arguments: JSON.stringify(toolCallPart.input)
-          }
-        };
-        toolCalls.push(toolCall);
-        
-        // Send tool call in streaming format
-        const chunkResponse: OpenAIResponse = {
-          id: responseId,
-          object: 'chat.completion.chunk',
-          created,
-          model: modelName,
-          choices: [{
-            index: 0,
-            delta: {
-              tool_calls: [toolCall]
-            },
-            finish_reason: null
-          }]
-        };
-        res.write(`data: ${JSON.stringify(chunkResponse)}\n\n`);
-      } else if (typeof part === 'string' || (part && typeof part === 'object' && 'value' in part)) {
-        // Handle text content
-        const content = typeof part === 'string' ? part : (part as any).value || '';
-        if (content) {
-          const chunkResponse: OpenAIResponse = {
-            id: responseId,
-            object: 'chat.completion.chunk',
-            created,
-            model: modelName,
-            choices: [{
-              index: 0,
-              delta: { content },
-              finish_reason: null
-            }]
-          };
-          res.write(`data: ${JSON.stringify(chunkResponse)}\n\n`);
-        }
-      }
-    }
-    
-    // Send final chunk
-    const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop';
-    const finalChunkResponse: OpenAIResponse = {
-      id: responseId,
-      object: 'chat.completion.chunk',
-      created,
-      model: modelName,
-      choices: [{
-        index: 0,
-        delta: {},
-        finish_reason: finishReason
-      }]
-    };
-    res.write(`data: ${JSON.stringify(finalChunkResponse)}\n\n`);
-    
-    verbose(`SSE end id=${responseId}`);
-    res.write('data: [DONE]\n\n');
-    res.end();
-    return;
-  }
-
-  // Non-streaming response
-  let content = '';
-  let toolCalls: OpenAIToolCall[] = [];
-  
-  for await (const part of response.stream) {
-    if (part && typeof part === 'object' && 'callId' in part && 'name' in part && 'input' in part) {
-      // Handle VS Code LanguageModelToolCallPart
-      const toolCallPart = part as vscode.LanguageModelToolCallPart;
-      const toolCall: OpenAIToolCall = {
-        id: toolCallPart.callId,
-        type: 'function',
-        function: {
-          name: toolCallPart.name,
-          arguments: JSON.stringify(toolCallPart.input)
-        }
-      };
-      toolCalls.push(toolCall);
-    } else if (typeof part === 'string' || (part && typeof part === 'object' && 'value' in part)) {
-      // Handle text content
-      content += typeof part === 'string' ? part : (part as any).value || '';
-    }
-  }
-  
-  verbose(`Non-stream complete len=${content.length} tool_calls=${toolCalls.length}`);
-  
-  const message: OpenAIMessage = {
-    role: 'assistant',
-    content: toolCalls.length > 0 ? null : content,
-  };
-  
-  // Add tool_calls if present
-  if (toolCalls.length > 0) {
-    message.tool_calls = toolCalls;
-    
-    // For backward compatibility, also add function_call if there's exactly one tool call
-    if (toolCalls.length === 1 && requestBody?.function_call !== undefined) {
-      message.function_call = {
-        name: toolCalls[0].function.name,
-        arguments: toolCalls[0].function.arguments
-      };
-    }
-  }
-  
-  const responseObj: OpenAIResponse = {
-    id: responseId,
-    object: 'chat.completion',
-    created,
-    model: modelName,
-    choices: [{
-      index: 0,
-      message,
-      finish_reason: toolCalls.length > 0 ? 'tool_calls' : 'stop',
-    }],
-    usage: {
-      prompt_tokens: 0, // VS Code API doesn't provide token counts
-      completion_tokens: 0,
-      total_tokens: 0
-    }
-  };
-  
-  writeJson(res, 200, responseObj);
-};
+/**
+ * Handles non-streaming response with complete data
+ */
+async function handleNonStreamingResponse(
+  res: ServerResponse,
+  response: vscode.LanguageModelChatResponse,
+  chatContext: ChatCompletionContext,
+  requestBody: ChatCompletionRequest
+): Promise<void> {
+  const processedData = await processLanguageModelResponse(response);
+  sendCompletionResponse(res, chatContext, processedData, requestBody);
+}
diff --git a/src/http/routes/health.ts b/src/http/routes/health.ts
index 23c4874..2da6ba8 100644
--- a/src/http/routes/health.ts
+++ b/src/http/routes/health.ts
@@ -5,17 +5,42 @@ import { hasLMApi, getModel } from '../../models';
 import { state } from '../../state';
 import { verbose } from '../../log';
 
+interface HealthResponse {
+  readonly ok: boolean;
+  readonly status: string;
+  readonly copilot: string;
+  readonly reason?: string;
+  readonly version: string;
+  readonly features: {
+    readonly chat_completions: boolean;
+    readonly streaming: boolean;
+    readonly tool_calling: boolean;
+    readonly function_calling: boolean;
+    readonly models_list: boolean;
+  };
+  readonly active_requests: number;
+  readonly model_attempted?: boolean;
+}
+
 export const handleHealthCheck = async (res: ServerResponse, v: boolean): Promise<void> => {
   const hasLM = hasLMApi();
+  
+  // Attempt model resolution if cache is empty and verbose logging is enabled
   if (!state.modelCache && v) {
     verbose(`Healthz: model=${state.modelCache ? 'present' : 'missing'} lmApi=${hasLM ? 'ok' : 'missing'}`);
-    await getModel();
+    try {
+      await getModel();
+    } catch (e) {
+      const msg = e instanceof Error ? e.message : String(e);
+      verbose(`Health check model resolution failed: ${msg}`);
+    }
   }
+  
   const unavailableReason = state.modelCache
     ? undefined
     : (!hasLM ? 'missing_language_model_api' : (state.lastReason || 'copilot_model_unavailable'));
   
-  writeJson(res, 200, {
+  const response: HealthResponse = {
     ok: true,
     status: 'operational',
     copilot: state.modelCache ? 'ok' : 'unavailable',
@@ -30,5 +55,7 @@ export const handleHealthCheck = async (res: ServerResponse, v: boolean): Promis
     },
     active_requests: state.activeRequests,
     model_attempted: state.modelAttempted
-  });
+  };
+  
+  writeJson(res, 200, response);
 };
diff --git a/src/http/routes/models.ts b/src/http/routes/models.ts
index 3e2569b..039c0ad 100644
--- a/src/http/routes/models.ts
+++ b/src/http/routes/models.ts
@@ -1,13 +1,31 @@
-import { writeJson } from '../utils';
+import { writeJson, writeErrorResponse } from '../utils';
 import { listCopilotModels } from '../../models';
+import { verbose } from '../../log';
 import type { ServerResponse } from 'http';
 
+interface ModelObject {
+  readonly id: string;
+  readonly object: 'model';
+  readonly created: number;
+  readonly owned_by: string;
+  readonly permission: readonly unknown[];
+  readonly root: string;
+  readonly parent: null;
+}
+
+interface ModelsListResponse {
+  readonly object: 'list';
+  readonly data: readonly ModelObject[];
+}
+
 export const handleModelsRequest = async (res: ServerResponse): Promise<void> => {
   try {
     const modelIds = await listCopilotModels();
-    const models = modelIds.map((id: string) => ({
+    verbose(`Models listed: ${modelIds.length} available`);
+    
+    const models: ModelObject[] = modelIds.map((id: string) => ({
       id,
-      object: 'model',
+      object: 'model' as const,
       created: Math.floor(Date.now() / 1000),
       owned_by: 'copilot',
       permission: [],
@@ -15,18 +33,15 @@ export const handleModelsRequest = async (res: ServerResponse): Promise<void> =>
       parent: null,
     }));
 
-    writeJson(res, 200, {
+    const response: ModelsListResponse = {
       object: 'list',
       data: models,
-    });
+    };
+    
+    writeJson(res, 200, response);
   } catch (e) {
     const msg = e instanceof Error ? e.message : String(e);
-    writeJson(res, 500, {
-      error: {
-        message: msg || 'Failed to list models',
-        type: 'server_error',
-        code: 'internal_error'
-      }
-    });
+    verbose(`Models request failed: ${msg}`);
+    writeErrorResponse(res, 500, msg || 'Failed to list models', 'server_error', 'internal_error');
   }
 };
diff --git a/src/models.ts b/src/models.ts
index 40d78d4..d7174e9 100644
--- a/src/models.ts
+++ b/src/models.ts
@@ -65,7 +65,8 @@ export const handleModelSelectionError = (error: unknown, family?: string): void
 
 export const listCopilotModels = async (): Promise<string[]> => {
   try {
-    const models = await selectChatModels();
+    // Filter for Copilot models only, consistent with getModel behavior
+    const models = await vscode.lm.selectChatModels({ vendor: 'copilot' });
     const ids = models.map((m: vscode.LanguageModelChat) => {
       const normalized = m.family || m.id || m.name || 'copilot';
       return `${normalized}`;
diff --git a/src/services/model-service.ts b/src/services/model-service.ts
new file mode 100644
index 0000000..562f899
--- /dev/null
+++ b/src/services/model-service.ts
@@ -0,0 +1,99 @@
+import type * as vscode from 'vscode';
+import type { ChatCompletionRequest } from '../messages';
+import type { 
+  ModelValidationResult, 
+  RequestProcessingContext,
+  ChatCompletionContext
+} from '../types/openai-types';
+import { 
+  extractAndMergeTools, 
+  createLanguageModelRequestOptions 
+} from './request-processor';
+import { getModel, hasLMApi } from '../models';
+import { normalizeMessagesLM, convertOpenAIToolsToLM } from '../messages';
+import { getBridgeConfig } from '../config';
+
+/**
+ * Service for validating models and creating request processing context
+ */
+export class ModelService {
+  
+  /**
+   * Validates the requested model and returns appropriate error details if invalid
+   * @param requestedModel - The model identifier from the request
+   * @returns Validation result with error details if model is unavailable
+   */
+  public async validateModel(requestedModel?: string): Promise<ModelValidationResult> {
+    const model = await getModel(false, requestedModel);
+    
+    if (!model) {
+      const hasLM = hasLMApi();
+      
+      if (requestedModel && hasLM) {
+        return {
+          isValid: false,
+          statusCode: 404,
+          errorType: 'invalid_request_error',
+          errorCode: 'model_not_found',
+          reason: 'not_found'
+        };
+      }
+      
+      const reason = !hasLM ? 'missing_language_model_api' : 'copilot_model_unavailable';
+      return {
+        isValid: false,
+        statusCode: 503,
+        errorType: 'server_error',
+        errorCode: 'copilot_unavailable',
+        reason
+      };
+    }
+    
+    return { isValid: true };
+  }
+
+  /**
+   * Creates a complete request processing context from validated inputs
+   * @param body - The validated chat completion request
+   * @returns Processing context with all required elements for the Language Model API
+   */
+  public async createProcessingContext(body: ChatCompletionRequest): Promise<RequestProcessingContext> {
+    const model = await getModel(false, body.model);
+    if (!model) {
+      throw new Error('Model validation should be performed before creating processing context');
+    }
+
+    const config = getBridgeConfig();
+    const mergedTools = extractAndMergeTools(body);
+    const lmMessages = normalizeMessagesLM(body.messages, config.historyWindow);
+    const lmTools = convertOpenAIToolsToLM(mergedTools);
+    const requestOptions = createLanguageModelRequestOptions(lmTools);
+
+    return {
+      model,
+      lmMessages: lmMessages as vscode.LanguageModelChatMessage[],
+      lmTools,
+      requestOptions,
+      mergedTools
+    };
+  }
+
+  /**
+   * Creates chat completion context for response formatting
+   * @param body - The chat completion request
+   * @param hasTools - Whether tools are present in the request
+   * @returns Context object for response handling
+   */
+  public createChatCompletionContext(
+    body: ChatCompletionRequest, 
+    hasTools: boolean
+  ): ChatCompletionContext {
+    return {
+      requestId: `chatcmpl-${Math.random().toString(36).slice(2)}`,
+      modelName: body.model || 'copilot',
+      created: Math.floor(Date.now() / 1000),
+      hasTools,
+      isStreaming: body.stream !== false
+    };
+  }
+}
\ No newline at end of file
diff --git a/src/services/request-processor.ts b/src/services/request-processor.ts
new file mode 100644
index 0000000..ccdcf82
--- /dev/null
+++ b/src/services/request-processor.ts
@@ -0,0 +1,39 @@
+import type { ChatCompletionRequest, Tool } from '../messages';
+import type * as vscode from 'vscode';
+
+/**
+ * Validates and extracts tool configurations from request body
+ * @param body - The parsed request body
+ * @returns Combined tools array including converted deprecated functions
+ */
+export function extractAndMergeTools(body: ChatCompletionRequest): Tool[] {
+  const tools = body.tools || [];
+  
+  if (body.functions) {
+    // Convert deprecated functions to tools format
+    const convertedTools: Tool[] = body.functions.map(func => ({
+      type: 'function' as const,
+      function: func
+    }));
+    return [...tools, ...convertedTools];
+  }
+  
+  return tools;
+}
+
+/**
+ * Creates VS Code Language Model request options from processed context
+ * @param lmTools - Array of Language Model compatible tools
+ * @returns Request options object for the Language Model API
+ */
+export function createLanguageModelRequestOptions(
+  lmTools: vscode.LanguageModelChatTool[]
+): vscode.LanguageModelChatRequestOptions {
+  const options: vscode.LanguageModelChatRequestOptions = {};
+  
+  if (lmTools.length > 0) {
+    options.tools = lmTools;
+  }
+  
+  return options;
+}
\ No newline at end of file
diff --git a/src/services/response-formatter.ts b/src/services/response-formatter.ts
new file mode 100644
index 0000000..dce7f4a
--- /dev/null
+++ b/src/services/response-formatter.ts
@@ -0,0 +1,158 @@
+import type * as vscode from 'vscode';
+import type { ServerResponse } from 'http';
+import type { 
+  OpenAIResponse, 
+  OpenAIChoice, 
+  OpenAIMessage, 
+  OpenAIToolCall, 
+  ChatCompletionContext,
+  ProcessedResponseData 
+} from '../types/openai-types';
+import type { ChatCompletionRequest } from '../messages';
+import { writeJson } from '../http/utils';
+import { verbose } from '../log';
+
+/**
+ * Processes VS Code Language Model stream parts into structured data
+ * @param response - The VS Code Language Model chat response
+ * @returns Promise resolving to processed content and tool calls
+ */
+export async function processLanguageModelResponse(
+  response: vscode.LanguageModelChatResponse
+): Promise<ProcessedResponseData> {
+  let content = '';
+  const toolCalls: OpenAIToolCall[] = [];
+  
+  for await (const part of response.stream) {
+    if (isToolCallPart(part)) {
+      const toolCall: OpenAIToolCall = {
+        id: part.callId,
+        type: 'function',
+        function: {
+          name: part.name,
+          arguments: JSON.stringify(part.input)
+        }
+      };
+      toolCalls.push(toolCall);
+    } else if (isTextPart(part)) {
+      content += extractTextContent(part);
+    }
+  }
+  
+  const finishReason: OpenAIChoice['finish_reason'] = toolCalls.length > 0 ? 'tool_calls' : 'stop';
+  
+  return {
+    content,
+    toolCalls,
+    finishReason
+  };
+}
+
+/**
+ * Creates an OpenAI-compatible response message
+ * @param data - The processed response data
+ * @param requestBody - Original request body for backward compatibility
+ * @returns OpenAI message object
+ */
+export function createOpenAIMessage(
+  data: ProcessedResponseData,
+  requestBody?: ChatCompletionRequest
+): OpenAIMessage {
+  const baseMessage = {
+    role: 'assistant' as const,
+    content: data.toolCalls.length > 0 ? null : data.content,
+  };
+  
+  // Add tool_calls if present
+  if (data.toolCalls.length > 0) {
+    const messageWithTools = {
+      ...baseMessage,
+      tool_calls: data.toolCalls,
+    };
+    
+    // For backward compatibility, also add function_call if there's exactly one tool call
+    if (data.toolCalls.length === 1 && requestBody?.function_call !== undefined) {
+      return {
+        ...messageWithTools,
+        function_call: {
+          name: data.toolCalls[0].function.name,
+          arguments: data.toolCalls[0].function.arguments
+        }
+      };
+    }
+    
+    return messageWithTools;
+  }
+  
+  return baseMessage;
+}
+
+/**
+ * Sends a complete (non-streaming) OpenAI-compatible response
+ * @param res - HTTP response object
+ * @param context - Chat completion context
+ * @param data - Processed response data
+ * @param requestBody - Original request body
+ */
+export function sendCompletionResponse(
+  res: ServerResponse,
+  context: ChatCompletionContext,
+  data: ProcessedResponseData,
+  requestBody?: ChatCompletionRequest
+): void {
+  const message = createOpenAIMessage(data, requestBody);
+  
+  const responseObj: OpenAIResponse = {
+    id: context.requestId,
+    object: 'chat.completion',
+    created: context.created,
+    model: context.modelName,
+    choices: [{
+      index: 0,
+      message,
+      finish_reason: data.finishReason,
+    }],
+    usage: {
+      prompt_tokens: 0, // VS Code API doesn't provide token counts
+      completion_tokens: 0,
+      total_tokens: 0
+    }
+  };
+  
+  verbose(`Non-stream complete len=${data.content.length} tool_calls=${data.toolCalls.length}`);
+  writeJson(res, 200, responseObj);
+}
+
+/**
+ * Type guard for VS Code LanguageModelToolCallPart
+ */
+function isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart {
+  return part !== null && 
+         typeof part === 'object' && 
+         'callId' in part && 
+         'name' in part && 
+         'input' in part;
+}
+
+/**
+ * Type guard for text content parts
+ */
+function isTextPart(part: unknown): boolean {
+  return typeof part === 'string' || 
+         (part !== null && typeof part === 'object' && 'value' in part);
+}
+
+/**
+ * Extracts text content from various part types
+ */
+function extractTextContent(part: unknown): string {
+  if (typeof part === 'string') {
+    return part;
+  }
+  
+  if (part !== null && typeof part === 'object' && 'value' in part) {
+    return String((part as { value: unknown }).value) || '';
+  }
+  
+  return '';
+}
\ No newline at end of file
diff --git a/src/services/streaming-handler.ts b/src/services/streaming-handler.ts
new file mode 100644
index 0000000..a972027
--- /dev/null
+++ b/src/services/streaming-handler.ts
@@ -0,0 +1,190 @@
+import type * as vscode from 'vscode';
+import type { ServerResponse } from 'http';
+import type { 
+  OpenAIResponse, 
+  OpenAIToolCall, 
+  ChatCompletionContext 
+} from '../types/openai-types';
+import type { ChatCompletionRequest } from '../messages';
+import { verbose } from '../log';
+
+/**
+ * Handles Server-Sent Events streaming for OpenAI-compatible chat completions
+ */
+export class StreamingResponseHandler {
+  private readonly response: ServerResponse;
+  private readonly context: ChatCompletionContext;
+  private readonly requestBody?: ChatCompletionRequest;
+  
+  constructor(
+    response: ServerResponse, 
+    context: ChatCompletionContext,
+    requestBody?: ChatCompletionRequest
+  ) {
+    this.response = response;
+    this.context = context;
+    this.requestBody = requestBody;
+  }
+
+  /**
+   * Initializes the SSE stream with proper headers
+   */
+  public initializeStream(): void {
+    this.response.writeHead(200, {
+      'Content-Type': 'text/event-stream',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+    });
+    
+    verbose(`SSE start id=${this.context.requestId}`);
+  }
+
+  /**
+   * Processes the Language Model response stream and sends SSE chunks
+   * @param languageModelResponse - VS Code Language Model response
+   */
+  public async processAndStreamResponse(
+    languageModelResponse: vscode.LanguageModelChatResponse
+  ): Promise<void> {
+    const toolCalls: OpenAIToolCall[] = [];
+    
+    for await (const part of languageModelResponse.stream) {
+      if (this.isToolCallPart(part)) {
+        const toolCall = this.createToolCallFromPart(part);
+        toolCalls.push(toolCall);
+        this.sendToolCallChunk(toolCall);
+      } else if (this.isTextPart(part)) {
+        const content = this.extractTextContent(part);
+        if (content) {
+          this.sendContentChunk(content);
+        }
+      }
+    }
+    
+    this.sendFinalChunk(toolCalls.length > 0 ? 'tool_calls' : 'stop');
+    this.endStream();
+  }
+
+  /**
+   * Sends a content delta chunk
+   */
+  private sendContentChunk(content: string): void {
+    const chunkResponse: OpenAIResponse = {
+      id: this.context.requestId,
+      object: 'chat.completion.chunk',
+      created: this.context.created,
+      model: this.context.modelName,
+      choices: [{
+        index: 0,
+        delta: { content },
+        finish_reason: null
+      }]
+    };
+    
+    this.writeSSEData(chunkResponse);
+  }
+
+  /**
+   * Sends a tool call chunk
+   */
+  private sendToolCallChunk(toolCall: OpenAIToolCall): void {
+    const chunkResponse: OpenAIResponse = {
+      id: this.context.requestId,
+      object: 'chat.completion.chunk',
+      created: this.context.created,
+      model: this.context.modelName,
+      choices: [{
+        index: 0,
+        delta: {
+          tool_calls: [toolCall]
+        },
+        finish_reason: null
+      }]
+    };
+    
+    this.writeSSEData(chunkResponse);
+  }
+
+  /**
+   * Sends the final completion chunk with finish reason
+   */
+  private sendFinalChunk(finishReason: 'stop' | 'tool_calls'): void {
+    const finalChunkResponse: OpenAIResponse = {
+      id: this.context.requestId,
+      object: 'chat.completion.chunk',
+      created: this.context.created,
+      model: this.context.modelName,
+      choices: [{
+        index: 0,
+        delta: {},
+        finish_reason: finishReason
+      }]
+    };
+    
+    this.writeSSEData(finalChunkResponse);
+  }
+
+  /**
+   * Ends the SSE stream
+   */
+  private endStream(): void {
+    verbose(`SSE end id=${this.context.requestId}`);
+    this.response.write('data: [DONE]\n\n');
+    this.response.end();
+  }
+
+  /**
+   * Writes data to the SSE stream
+   */
+  private writeSSEData(data: OpenAIResponse): void {
+    this.response.write(`data: ${JSON.stringify(data)}\n\n`);
+  }
+
+  /**
+   * Creates an OpenAI tool call from VS Code Language Model part
+   */
+  private createToolCallFromPart(part: vscode.LanguageModelToolCallPart): OpenAIToolCall {
+    return {
+      id: part.callId,
+      type: 'function',
+      function: {
+        name: part.name,
+        arguments: JSON.stringify(part.input)
+      }
+    };
+  }
+
+  /**
+   * Type guard for VS Code LanguageModelToolCallPart
+   */
+  private isToolCallPart(part: unknown): part is vscode.LanguageModelToolCallPart {
+    return part !== null && 
+           typeof part === 'object' && 
+           'callId' in part && 
+           'name' in part && 
+           'input' in part;
+  }
+
+  /**
+   * Type guard for text content parts
+   */
+  private isTextPart(part: unknown): boolean {
+    return typeof part === 'string' || 
+           (part !== null && typeof part === 'object' && 'value' in part);
+  }
+
+  /**
+   * Extracts text content from various part types
+   */
+  private extractTextContent(part: unknown): string {
+    if (typeof part === 'string') {
+      return part;
+    }
+    
+    if (part !== null && typeof part === 'object' && 'value' in part) {
+      return String((part as { value: unknown }).value) || '';
+    }
+    
+    return '';
+  }
+}
\ No newline at end of file
diff --git a/src/types/openai-types.ts b/src/types/openai-types.ts
new file mode 100644
index 0000000..130e406
--- /dev/null
+++ b/src/types/openai-types.ts
@@ -0,0 +1,81 @@
+import type * as vscode from 'vscode';
+import type { Tool } from '../messages';
+
+/**
+ * OpenAI API compatible types for request and response handling
+ */
+
+export interface OpenAIToolCall {
+  readonly id: string;
+  readonly type: 'function';
+  readonly function: {
+    readonly name: string;
+    readonly arguments: string;
+  };
+}
+
+export interface OpenAIMessage {
+  readonly role: 'assistant';
+  readonly content: string | null;
+  readonly tool_calls?: OpenAIToolCall[];
+  readonly function_call?: {
+    readonly name: string;
+    readonly arguments: string;
+  };
+}
+
+export interface OpenAIChoice {
+  readonly index: number;
+  readonly message?: OpenAIMessage;
+  readonly delta?: Partial<OpenAIMessage>;
+  readonly finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
+}
+
+export interface OpenAIResponse {
+  readonly id: string;
+  readonly object: 'chat.completion' | 'chat.completion.chunk';
+  readonly created: number;
+  readonly model: string;
+  readonly choices: OpenAIChoice[];
+  readonly usage?: {
+    readonly prompt_tokens: number;
+    readonly completion_tokens: number;
+    readonly total_tokens: number;
+  };
+}
+
+export interface ChatCompletionContext {
+  readonly requestId: string;
+  readonly modelName: string;
+  readonly created: number;
+  readonly hasTools: boolean;
+  readonly isStreaming: boolean;
+}
+
+export interface ProcessedResponseData {
+  readonly content: string;
+  readonly toolCalls: OpenAIToolCall[];
+  readonly finishReason: OpenAIChoice['finish_reason'];
+}
+
+/**
+ * Validates that the request model is available and properly configured
+ */
+export interface ModelValidationResult {
+  readonly isValid: boolean;
+  readonly statusCode?: number;
+  readonly errorType?: string;
+  readonly errorCode?: string;
+  readonly reason?: string;
+}
+
+/**
+ * Consolidated request processing context for chat completions
+ */
+export interface RequestProcessingContext {
+  readonly model: vscode.LanguageModelChat;
+  readonly lmMessages: vscode.LanguageModelChatMessage[];
+  readonly lmTools: vscode.LanguageModelChatTool[];
+  readonly requestOptions: vscode.LanguageModelChatRequestOptions;
+  readonly mergedTools: Tool[];
+}
\ No newline at end of file
diff --git a/tsconfig.json b/tsconfig.json
index a0001ce..fb978bd 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -9,6 +9,7 @@
     "sourceMap": true,
     "esModuleInterop": true,
     "allowSyntheticDefaultImports": true,
+    "forceConsistentCasingInFileNames": true,
     "types": ["node", "vscode"]
   },
   "include": ["src/**/*.ts"]