diff --git a/core/autocomplete/postprocessing/index.ts b/core/autocomplete/postprocessing/index.ts index 6ce3742d6a2..329e58ca07c 100644 --- a/core/autocomplete/postprocessing/index.ts +++ b/core/autocomplete/postprocessing/index.ts @@ -142,9 +142,16 @@ export function postprocessCompletion({ if (llm.model.includes("qwen3")) { // Qwen3 always starts from special thinking markers, and we don't want them to output these contents - // Remove all content from " - completion = completion.replace(/.*?<\/think>/s, ""); - completion = completion.replace(/<\/think>/, ""); + // Remove all content within thinking tags. Use the configurable thinkTagName so custom + // provider formats (e.g. vLLM reasoning output tags) are also handled correctly. + const thinkTagName = llm.thinkTagName; + const thinkBlockRegex = new RegExp( + `<${thinkTagName}>.*?<\\/${thinkTagName}>`, + "s", + ); + const thinkCloseTagRegex = new RegExp(`<\\/${thinkTagName}>`); + completion = completion.replace(thinkBlockRegex, ""); + completion = completion.replace(thinkCloseTagRegex, ""); // Remove any number of newline characters at the beginning and end completion = completion.replace(/^\n+|\n+$/g, ""); diff --git a/core/config/types.ts b/core/config/types.ts index 2500042e887..38174724cb4 100644 --- a/core/config/types.ts +++ b/core/config/types.ts @@ -102,6 +102,13 @@ declare global { apiType?: string; region?: string; projectId?: string; + + /** + * The XML tag name used for thinking/reasoning output. + * Defaults to "think" (...). + * Configure this to match your provider's format (e.g. vLLM custom reasoning tags). + */ + thinkTagName: string; // Embedding options embeddingId: string; @@ -572,6 +579,15 @@ declare global { // IBM watsonx Options deploymentId?: string; + + /** + * The XML tag name used by the LLM provider for thinking/reasoning output. + * Different providers (e.g. vLLM, Ollama) may use different tag names. + * Defaults to "think", which produces ... blocks. + * Set this to match your provider's reasoning output format. + * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html + */ + thinkTagName?: string; } type RequireAtLeastOne = Pick< diff --git a/core/index.d.ts b/core/index.d.ts index bec3e0e0ff8..912dbdb3d7e 100644 --- a/core/index.d.ts +++ b/core/index.d.ts @@ -713,6 +713,14 @@ export interface LLMOptions { /** Tool overrides for this model */ toolOverrides?: ToolOverride[]; + + /** + * The XML tag name used by the LLM provider for thinking/reasoning output. + * Defaults to "think", which produces ... blocks. + * Configure this to match your provider's reasoning output format. + * See: https://docs.vllm.ai/en/latest/features/reasoning_outputs.html + */ + thinkTagName?: string; } type RequireAtLeastOne = Pick< diff --git a/core/llm/index.ts b/core/llm/index.ts index 1af44b25614..5806064f889 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -184,6 +184,13 @@ export abstract class BaseLLM implements ILLM { // For IBM watsonx deploymentId?: string; + /** + * The XML tag name used for thinking/reasoning output. + * Defaults to "think" (...). + * Override via config to match your provider (e.g. vLLM custom reasoning tags). + */ + thinkTagName: string; + // Embedding options embeddingId: string; maxEmbeddingChunkSize: number; @@ -272,6 +279,9 @@ export abstract class BaseLLM implements ILLM { // watsonx deploymentId this.deploymentId = options.deploymentId; + // Thinking/reasoning output tag name (configurable for providers like vLLM) + this.thinkTagName = options.thinkTagName ?? "think"; + if (this.apiBase && !this.apiBase.endsWith("/")) { this.apiBase = `${this.apiBase}/`; } diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts index 4bcd9fb1e0f..1f3fd1f9066 100644 --- a/core/llm/llms/Ollama.ts +++ b/core/llm/llms/Ollama.ts @@ -535,6 +535,8 @@ class Ollama extends BaseLLM implements ModelInstaller { signal, }); let isThinking: boolean = false; + const thinkOpenTag = `<${this.thinkTagName}>`; + const thinkCloseTag = ``; function convertChatMessage(res: OllamaChatResponse): ChatMessage[] { if ("error" in res) { @@ -544,7 +546,7 @@ class Ollama extends BaseLLM implements ModelInstaller { if ("type" in res) { const { content } = res; - if (content === "") { + if (content === thinkOpenTag) { isThinking = true; } @@ -557,7 +559,7 @@ class Ollama extends BaseLLM implements ModelInstaller { if (thinkingMessage) { // could cause issues with termination if chunk doesn't match this exactly - if (content === "") { + if (content === thinkCloseTag) { isThinking = false; } // When Streaming you can't have both thinking and content diff --git a/core/util/index.ts b/core/util/index.ts index 76b319bddf0..c8e7aad728a 100644 --- a/core/util/index.ts +++ b/core/util/index.ts @@ -191,13 +191,24 @@ export function dedent(strings: TemplateStringsArray, ...values: any[]) { } /** - * Removes code blocks from a message. + * Removes code blocks and thinking blocks from a message. * - * Return modified message text. + * @param text - The message text to process. + * @param thinkTagName - The XML tag name used for thinking output (default: "think"). + * Different LLM providers may use different tag names for reasoning output. + * Set this to match your provider's format (e.g. vLLM custom reasoning tags). + * @returns Modified message text with code blocks and think blocks removed. */ -export function removeCodeBlocksAndTrim(text: string): string { +export function removeCodeBlocksAndTrim( + text: string, + thinkTagName: string = "think", +): string { const codeBlockRegex = /```[\s\S]*?```/g; - const thinkBlockRegex = /[\s\S]*?<\/think>/g; + // Build regex dynamically based on the configured tag name + const thinkBlockRegex = new RegExp( + `<${thinkTagName}>[\\s\\S]*?<\\/${thinkTagName}>`, + "g", + ); // Remove code blocks and think blocks from the message text let processedText = text.replace(codeBlockRegex, "");