continuedev · vinaychauhannumber · Jul 1, 2026 · Jul 1, 2026 · cubic-dev-ai · Jul 1, 2026
@@ -142,9 +142,16 @@ export function postprocessCompletion({
 
   if (llm.model.includes("qwen3")) {
     // Qwen3 always starts from special thinking markers, and we don't want them to output these contents
-    // Remove all content from "
-    completion = completion.replace(/<think>.*?<\/think>/s, "");
-    completion = completion.replace(/<\/think>/, "");
+    // Remove all content within thinking tags. Use the configurable thinkTagName so custom
+    // provider formats (e.g. vLLM reasoning output tags) are also handled correctly.
+    const thinkTagName = llm.thinkTagName;
+    const thinkBlockRegex = new RegExp(
+      `<${thinkTagName}>.*?<\\/${thinkTagName}>`,
+      "s",
+    );
+    const thinkCloseTagRegex = new RegExp(`<\\/${thinkTagName}>`);
+    completion = completion.replace(thinkBlockRegex, "");
+    completion = completion.replace(thinkCloseTagRegex, "");
 
     // Remove any number of newline characters at the beginning and end
     completion = completion.replace(/^\n+|\n+$/g, "");

@@ -102,6 +102,13 @@ declare global {
     apiType?: string;
     region?: string;
     projectId?: string;
+
+    /**
+     * The XML tag name used for thinking/reasoning output.
+     * Defaults to "think" (<think>...</think>).
+     * Configure this to match your provider's format (e.g. vLLM custom reasoning tags).
+     */
+    thinkTagName: string;
 
     // Embedding options
     embeddingId: string;
@@ -572,6 +579,15 @@ declare global {
 
     // IBM watsonx Options
     deploymentId?: string;
+
+    /**
+     * The XML tag name used by the LLM provider for thinking/reasoning output.
+     * Different providers (e.g. vLLM, Ollama) may use different tag names.
+     * Defaults to "think", which produces <think>...</think> blocks.
+     * Set this to match your provider's reasoning output format.
+     * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html
+     */
+    thinkTagName?: string;
   }
 
   type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<

@@ -713,6 +713,14 @@ export interface LLMOptions {
 
   /** Tool overrides for this model */
   toolOverrides?: ToolOverride[];
+
+  /**
+   * The XML tag name used by the LLM provider for thinking/reasoning output.
+   * Defaults to "think", which produces <think>...</think> blocks.
+   * Configure this to match your provider's reasoning output format.
+   * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html
+   */
+  thinkTagName?: string;
 }
 
 type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<

@@ -184,6 +184,13 @@ export abstract class BaseLLM implements ILLM {
   // For IBM watsonx
   deploymentId?: string;
 
+  /**
+   * The XML tag name used for thinking/reasoning output.
+   * Defaults to "think" (<think>...</think>).
+   * Override via config to match your provider (e.g. vLLM custom reasoning tags).
+   */
+  thinkTagName: string;
+
   // Embedding options
   embeddingId: string;
   maxEmbeddingChunkSize: number;
@@ -272,6 +279,9 @@ export abstract class BaseLLM implements ILLM {
     // watsonx deploymentId
     this.deploymentId = options.deploymentId;
 
+    // Thinking/reasoning output tag name (configurable for providers like vLLM)
+    this.thinkTagName = options.thinkTagName ?? "think";
+
     if (this.apiBase && !this.apiBase.endsWith("/")) {
       this.apiBase = `${this.apiBase}/`;
     }

@@ -535,6 +535,8 @@ class Ollama extends BaseLLM implements ModelInstaller {
       signal,
     });
     let isThinking: boolean = false;
+    const thinkOpenTag = `<${this.thinkTagName}>`;
+    const thinkCloseTag = `</${this.thinkTagName}>`;
 
     function convertChatMessage(res: OllamaChatResponse): ChatMessage[] {
       if ("error" in res) {
@@ -544,7 +546,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
       if ("type" in res) {
         const { content } = res;
 
-        if (content === "<think>") {
+        if (content === thinkOpenTag) {
           isThinking = true;
         }
 
@@ -557,7 +559,7 @@ class Ollama extends BaseLLM implements ModelInstaller {
 
           if (thinkingMessage) {
             // could cause issues with termination if chunk doesn't match this exactly
-            if (content === "</think>") {
+            if (content === thinkCloseTag) {
               isThinking = false;
             }
             // When Streaming you can't have both thinking and content

@@ -191,13 +191,24 @@ export function dedent(strings: TemplateStringsArray, ...values: any[]) {
 }
 
 /**
- * Removes code blocks from a message.
+ * Removes code blocks and thinking blocks from a message.
  *
- * Return modified message text.
+ * @param text - The message text to process.
+ * @param thinkTagName - The XML tag name used for thinking output (default: "think").
+ *   Different LLM providers may use different tag names for reasoning output.
+ *   Set this to match your provider's format (e.g. vLLM custom reasoning tags).
+ * @returns Modified message text with code blocks and think blocks removed.
  */
-export function removeCodeBlocksAndTrim(text: string): string {
+export function removeCodeBlocksAndTrim(
+  text: string,
+  thinkTagName: string = "think",
+): string {
   const codeBlockRegex = /```[\s\S]*?```/g;
-  const thinkBlockRegex = /<think>[\s\S]*?<\/think>/g;
+  // Build regex dynamically based on the configured tag name
+  const thinkBlockRegex = new RegExp(
+    `<${thinkTagName}>[\\s\\S]*?<\\/${thinkTagName}>`,
+    "g",
+  );
 
   // Remove code blocks and think blocks from the message text
   let processedText = text.replace(codeBlockRegex, "");