From 57163fb0ad5eb9ac674167bfa45c83acfce2c695 Mon Sep 17 00:00:00 2001 From: Vinay Chauhan Date: Wed, 1 Jul 2026 22:08:05 +0530 Subject: [PATCH 1/2] feat(llm): add configurable thinkTagName for thinking output formats (#5992) Different LLM providers use different XML tag names for reasoning/thinking output. Previously, Continue hardcoded '...' everywhere, which broke support for providers like vLLM that use custom tag formats. Changes: - Add `thinkTagName` option to `LLMOptions` interface (defaults to 'think') - Wire `thinkTagName` through the BaseLLM constructor with a default of 'think', ensuring full backward compatibility - Update Ollama streaming chat handler to use configurable open/close tags instead of hardcoded '' / '' - Update `removeCodeBlocksAndTrim` in core/util to accept an optional `thinkTagName` param and build the regex dynamically - Update autocomplete postprocessing (Qwen3 block) to use `llm.thinkTagName` instead of hardcoded '' regex Usage example in config: { "provider": "openai-compatible", "model": "my-vllm-model", "apiBase": "http://localhost:8000/v1", "thinkTagName": "reasoning" // strips ... } Fixes #5992 Ref: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html --- core/autocomplete/postprocessing/index.ts | 13 ++++++++++--- core/config/types.ts | 9 +++++++++ core/llm/index.ts | 10 ++++++++++ core/llm/llms/Ollama.ts | 6 ++++-- core/util/index.ts | 19 +++++++++++++++---- 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/core/autocomplete/postprocessing/index.ts b/core/autocomplete/postprocessing/index.ts index 6ce3742d6a2..329e58ca07c 100644 --- a/core/autocomplete/postprocessing/index.ts +++ b/core/autocomplete/postprocessing/index.ts @@ -142,9 +142,16 @@ export function postprocessCompletion({ if (llm.model.includes("qwen3")) { // Qwen3 always starts from special thinking markers, and we don't want them to output these contents - // Remove all content from " - completion = completion.replace(/.*?<\/think>/s, ""); - completion = completion.replace(/<\/think>/, ""); + // Remove all content within thinking tags. Use the configurable thinkTagName so custom + // provider formats (e.g. vLLM reasoning output tags) are also handled correctly. + const thinkTagName = llm.thinkTagName; + const thinkBlockRegex = new RegExp( + `<${thinkTagName}>.*?<\\/${thinkTagName}>`, + "s", + ); + const thinkCloseTagRegex = new RegExp(`<\\/${thinkTagName}>`); + completion = completion.replace(thinkBlockRegex, ""); + completion = completion.replace(thinkCloseTagRegex, ""); // Remove any number of newline characters at the beginning and end completion = completion.replace(/^\n+|\n+$/g, ""); diff --git a/core/config/types.ts b/core/config/types.ts index 2500042e887..a51f75bae39 100644 --- a/core/config/types.ts +++ b/core/config/types.ts @@ -572,6 +572,15 @@ declare global { // IBM watsonx Options deploymentId?: string; + + /** + * The XML tag name used by the LLM provider for thinking/reasoning output. + * Different providers (e.g. vLLM, Ollama) may use different tag names. + * Defaults to "think", which produces ... blocks. + * Set this to match your provider's reasoning output format. + * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html + */ + thinkTagName?: string; } type RequireAtLeastOne = Pick< diff --git a/core/llm/index.ts b/core/llm/index.ts index 1af44b25614..5806064f889 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -184,6 +184,13 @@ export abstract class BaseLLM implements ILLM { // For IBM watsonx deploymentId?: string; + /** + * The XML tag name used for thinking/reasoning output. + * Defaults to "think" (...). + * Override via config to match your provider (e.g. vLLM custom reasoning tags). + */ + thinkTagName: string; + // Embedding options embeddingId: string; maxEmbeddingChunkSize: number; @@ -272,6 +279,9 @@ export abstract class BaseLLM implements ILLM { // watsonx deploymentId this.deploymentId = options.deploymentId; + // Thinking/reasoning output tag name (configurable for providers like vLLM) + this.thinkTagName = options.thinkTagName ?? "think"; + if (this.apiBase && !this.apiBase.endsWith("/")) { this.apiBase = `${this.apiBase}/`; } diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts index 4bcd9fb1e0f..1f3fd1f9066 100644 --- a/core/llm/llms/Ollama.ts +++ b/core/llm/llms/Ollama.ts @@ -535,6 +535,8 @@ class Ollama extends BaseLLM implements ModelInstaller { signal, }); let isThinking: boolean = false; + const thinkOpenTag = `<${this.thinkTagName}>`; + const thinkCloseTag = ``; function convertChatMessage(res: OllamaChatResponse): ChatMessage[] { if ("error" in res) { @@ -544,7 +546,7 @@ class Ollama extends BaseLLM implements ModelInstaller { if ("type" in res) { const { content } = res; - if (content === "") { + if (content === thinkOpenTag) { isThinking = true; } @@ -557,7 +559,7 @@ class Ollama extends BaseLLM implements ModelInstaller { if (thinkingMessage) { // could cause issues with termination if chunk doesn't match this exactly - if (content === "") { + if (content === thinkCloseTag) { isThinking = false; } // When Streaming you can't have both thinking and content diff --git a/core/util/index.ts b/core/util/index.ts index 76b319bddf0..c8e7aad728a 100644 --- a/core/util/index.ts +++ b/core/util/index.ts @@ -191,13 +191,24 @@ export function dedent(strings: TemplateStringsArray, ...values: any[]) { } /** - * Removes code blocks from a message. + * Removes code blocks and thinking blocks from a message. * - * Return modified message text. + * @param text - The message text to process. + * @param thinkTagName - The XML tag name used for thinking output (default: "think"). + * Different LLM providers may use different tag names for reasoning output. + * Set this to match your provider's format (e.g. vLLM custom reasoning tags). + * @returns Modified message text with code blocks and think blocks removed. */ -export function removeCodeBlocksAndTrim(text: string): string { +export function removeCodeBlocksAndTrim( + text: string, + thinkTagName: string = "think", +): string { const codeBlockRegex = /```[\s\S]*?```/g; - const thinkBlockRegex = /[\s\S]*?<\/think>/g; + // Build regex dynamically based on the configured tag name + const thinkBlockRegex = new RegExp( + `<${thinkTagName}>[\\s\\S]*?<\\/${thinkTagName}>`, + "g", + ); // Remove code blocks and think blocks from the message text let processedText = text.replace(codeBlockRegex, ""); From f23b9e511687556cf07a86d88adc95684c1180d3 Mon Sep 17 00:00:00 2001 From: Vinay Chauhan Date: Thu, 2 Jul 2026 04:47:02 +0530 Subject: [PATCH 2/2] fix(types): add thinkTagName to ILLM interface and index.d.ts LLMOptions The previous commit added thinkTagName to LLMOptions and BaseLLM but missed: 1. The ILLM interface in core/config/types.ts which explicitly declares properties (TypeScript doesn't infer them from extends alone for build checks) 2. The public LLMOptions in core/index.d.ts used by the VSIX build pipeline This caused CI failures: - 'Property thinkTagName does not exist on type ILLM' - 'Property thinkTagName does not exist on type { model: string; title: ... }' Fixes both by adding thinkTagName to all relevant type definitions. --- core/config/types.ts | 7 +++++++ core/index.d.ts | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/core/config/types.ts b/core/config/types.ts index a51f75bae39..38174724cb4 100644 --- a/core/config/types.ts +++ b/core/config/types.ts @@ -102,6 +102,13 @@ declare global { apiType?: string; region?: string; projectId?: string; + + /** + * The XML tag name used for thinking/reasoning output. + * Defaults to "think" (...). + * Configure this to match your provider's format (e.g. vLLM custom reasoning tags). + */ + thinkTagName: string; // Embedding options embeddingId: string; diff --git a/core/index.d.ts b/core/index.d.ts index bec3e0e0ff8..912dbdb3d7e 100644 --- a/core/index.d.ts +++ b/core/index.d.ts @@ -713,6 +713,14 @@ export interface LLMOptions { /** Tool overrides for this model */ toolOverrides?: ToolOverride[]; + + /** + * The XML tag name used by the LLM provider for thinking/reasoning output. + * Defaults to "think", which produces ... blocks. + * Configure this to match your provider's reasoning output format. + * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html + */ + thinkTagName?: string; } type RequireAtLeastOne = Pick<