From 57163fb0ad5eb9ac674167bfa45c83acfce2c695 Mon Sep 17 00:00:00 2001
From: Vinay Chauhan <vinaychauhannumber@users.noreply.github.com>
Date: Wed, 1 Jul 2026 22:08:05 +0530
Subject: [PATCH 1/2] feat(llm): add configurable thinkTagName for thinking
 output formats (#5992)

Different LLM providers use different XML tag names for reasoning/thinking
output. Previously, Continue hardcoded '<think>...</think>' everywhere,
which broke support for providers like vLLM that use custom tag formats.

Changes:
- Add `thinkTagName` option to `LLMOptions` interface (defaults to 'think')
- Wire `thinkTagName` through the BaseLLM constructor with a default of 'think',
  ensuring full backward compatibility
- Update Ollama streaming chat handler to use configurable open/close tags
  instead of hardcoded '<think>' / '</think>'
- Update `removeCodeBlocksAndTrim` in core/util to accept an optional
  `thinkTagName` param and build the regex dynamically
- Update autocomplete postprocessing (Qwen3 block) to use `llm.thinkTagName`
  instead of hardcoded '<think>' regex

Usage example in config:
  {
    "provider": "openai-compatible",
    "model": "my-vllm-model",
    "apiBase": "http://localhost:8000/v1",
    "thinkTagName": "reasoning"  // strips <reasoning>...</reasoning>
  }

Fixes #5992
Ref: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html
---
 core/autocomplete/postprocessing/index.ts | 13 ++++++++++---
 core/config/types.ts                      |  9 +++++++++
 core/llm/index.ts                         | 10 ++++++++++
 core/llm/llms/Ollama.ts                   |  6 ++++--
 core/util/index.ts                        | 19 +++++++++++++++----
 5 files changed, 48 insertions(+), 9 deletions(-)
diff --git a/core/autocomplete/postprocessing/index.ts b/core/autocomplete/postprocessing/index.ts
index 6ce3742d6a2..329e58ca07c 100644
--- a/core/autocomplete/postprocessing/index.ts
+++ b/core/autocomplete/postprocessing/index.ts
@@ -142,9 +142,16 @@ export function postprocessCompletion({
 
   if (llm.model.includes("qwen3")) {
     // Qwen3 always starts from special thinking markers, and we don't want them to output these contents
-    // Remove all content from "
-    completion = completion.replace(/<think>.*?<\/think>/s, "");
-    completion = completion.replace(/<\/think>/, "");
+    // Remove all content within thinking tags. Use the configurable thinkTagName so custom
+    // provider formats (e.g. vLLM reasoning output tags) are also handled correctly.
+    const thinkTagName = llm.thinkTagName;
+    const thinkBlockRegex = new RegExp(
+      `<${thinkTagName}>.*?<\\/${thinkTagName}>`,
+      "s",
+    );
+    const thinkCloseTagRegex = new RegExp(`<\\/${thinkTagName}>`);
+    completion = completion.replace(thinkBlockRegex, "");
+    completion = completion.replace(thinkCloseTagRegex, "");
 
     // Remove any number of newline characters at the beginning and end
     completion = completion.replace(/^\n+|\n+$/g, "");
diff --git a/core/config/types.ts b/core/config/types.ts
index 2500042e887..a51f75bae39 100644
--- a/core/config/types.ts
+++ b/core/config/types.ts
@@ -572,6 +572,15 @@ declare global {
   
     // IBM watsonx Options
     deploymentId?: string;
+
+    /**
+     * The XML tag name used by the LLM provider for thinking/reasoning output.
+     * Different providers (e.g. vLLM, Ollama) may use different tag names.
+     * Defaults to "think", which produces <think>...</think> blocks.
+     * Set this to match your provider's reasoning output format.
+     * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html
+     */
+    thinkTagName?: string;
   }
   
   type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<
diff --git a/core/llm/index.ts b/core/llm/index.ts
index 1af44b25614..5806064f889 100644
--- a/core/llm/index.ts
+++ b/core/llm/index.ts
@@ -184,6 +184,13 @@ export abstract class BaseLLM implements ILLM {
   // For IBM watsonx
   deploymentId?: string;
 
+  /**
+   * The XML tag name used for thinking/reasoning output.
+   * Defaults to "think" (<think>...</think>).
+   * Override via config to match your provider (e.g. vLLM custom reasoning tags).
+   */
+  thinkTagName: string;
+
   // Embedding options
   embeddingId: string;
   maxEmbeddingChunkSize: number;
@@ -272,6 +279,9 @@ export abstract class BaseLLM implements ILLM {
     // watsonx deploymentId
     this.deploymentId = options.deploymentId;
 
+    // Thinking/reasoning output tag name (configurable for providers like vLLM)
+    this.thinkTagName = options.thinkTagName ?? "think";
+
     if (this.apiBase && !this.apiBase.endsWith("/")) {
       this.apiBase = `${this.apiBase}/`;
     }
diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts
index 4bcd9fb1e0f..1f3fd1f9066 100644
--- a/core/llm/llms/Ollama.ts
+++ b/core/llm/llms/Ollama.ts
@@ -535,6 +535,8 @@ class Ollama extends BaseLLM implements ModelInstaller {
       signal,
     });
     let isThinking: boolean = false;
+    const thinkOpenTag = `<${this.thinkTagName}>`;
+    const thinkCloseTag = `</${this.thinkTagName}>`;
 
     function convertChatMessage(res: OllamaChatResponse): ChatMessage[] {
       if ("error" in res) {
@@ -544,7 +546,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
       if ("type" in res) {
         const { content } = res;
 
-        if (content === "<think>") {
+        if (content === thinkOpenTag) {
           isThinking = true;
         }
 
@@ -557,7 +559,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
 
           if (thinkingMessage) {
             // could cause issues with termination if chunk doesn't match this exactly
-            if (content === "</think>") {
+            if (content === thinkCloseTag) {
               isThinking = false;
             }
             // When Streaming you can't have both thinking and content
diff --git a/core/util/index.ts b/core/util/index.ts
index 76b319bddf0..c8e7aad728a 100644
--- a/core/util/index.ts
+++ b/core/util/index.ts
@@ -191,13 +191,24 @@ export function dedent(strings: TemplateStringsArray, ...values: any[]) {
 }
 
 /**
- * Removes code blocks from a message.
+ * Removes code blocks and thinking blocks from a message.
  *
- * Return modified message text.
+ * @param text - The message text to process.
+ * @param thinkTagName - The XML tag name used for thinking output (default: "think").
+ *   Different LLM providers may use different tag names for reasoning output.
+ *   Set this to match your provider's format (e.g. vLLM custom reasoning tags).
+ * @returns Modified message text with code blocks and think blocks removed.
  */
-export function removeCodeBlocksAndTrim(text: string): string {
+export function removeCodeBlocksAndTrim(
+  text: string,
+  thinkTagName: string = "think",
+): string {
   const codeBlockRegex = /```[\s\S]*?```/g;
-  const thinkBlockRegex = /<think>[\s\S]*?<\/think>/g;
+  // Build regex dynamically based on the configured tag name
+  const thinkBlockRegex = new RegExp(
+    `<${thinkTagName}>[\\s\\S]*?<\\/${thinkTagName}>`,
+    "g",
+  );
 
   // Remove code blocks and think blocks from the message text
   let processedText = text.replace(codeBlockRegex, "");

From f23b9e511687556cf07a86d88adc95684c1180d3 Mon Sep 17 00:00:00 2001
From: Vinay Chauhan <vinaychauhannumber@users.noreply.github.com>
Date: Thu, 2 Jul 2026 04:47:02 +0530
Subject: [PATCH 2/2] fix(types): add thinkTagName to ILLM interface and
 index.d.ts LLMOptions

The previous commit added thinkTagName to LLMOptions and BaseLLM but missed:
1. The ILLM interface in core/config/types.ts which explicitly declares
   properties (TypeScript doesn't infer them from extends alone for build checks)
2. The public LLMOptions in core/index.d.ts used by the VSIX build pipeline

This caused CI failures:
- 'Property thinkTagName does not exist on type ILLM'
- 'Property thinkTagName does not exist on type { model: string; title: ... }'

Fixes both by adding thinkTagName to all relevant type definitions.
---
 core/config/types.ts | 7 +++++++
 core/index.d.ts      | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/core/config/types.ts b/core/config/types.ts
index a51f75bae39..38174724cb4 100644
--- a/core/config/types.ts
+++ b/core/config/types.ts
@@ -102,6 +102,13 @@ declare global {
     apiType?: string;
     region?: string;
     projectId?: string;
+
+    /**
+     * The XML tag name used for thinking/reasoning output.
+     * Defaults to "think" (<think>...</think>).
+     * Configure this to match your provider's format (e.g. vLLM custom reasoning tags).
+     */
+    thinkTagName: string;
   
     // Embedding options
     embeddingId: string;
diff --git a/core/index.d.ts b/core/index.d.ts
index bec3e0e0ff8..912dbdb3d7e 100644
--- a/core/index.d.ts
+++ b/core/index.d.ts
@@ -713,6 +713,14 @@ export interface LLMOptions {
 
   /** Tool overrides for this model */
   toolOverrides?: ToolOverride[];
+
+  /**
+   * The XML tag name used by the LLM provider for thinking/reasoning output.
+   * Defaults to "think", which produces <think>...</think> blocks.
+   * Configure this to match your provider's reasoning output format.
+   * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html
+   */
+  thinkTagName?: string;
 }
 
 type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<