From df354b33d08d00772d181c228e8b64fe937cc4cc Mon Sep 17 00:00:00 2001
From: Harsh16gupta <harsh16official@gmail.com>
Date: Sat, 4 Jul 2026 16:50:12 +0530
Subject: [PATCH 1/2] feat: auto-name clusters using TF-IDF taxonomy with
 inline rename UI

---
 package.json                            |   2 +-
 src/commands/testEmbed.ts               |  52 ++-
 src/pipeline/UmapProjector.ts           |  51 ++-
 src/pipeline/clustering/benchmark.ts    |  20 +-
 src/pipeline/clustering/postProcess.ts  | 499 ++++++++++++++++++++++--
 src/pipeline/nativeEmbeddingPipeline.ts |  71 ++++
 src/pipeline/pipelineConfig.ts          |  23 ++
 src/pipeline/runPipeline.ts             | 115 ++++--
 src/types/cluster.ts                    |   2 +
 src/webview/components/ClusterCard.tsx  |  70 +++-
 src/webview/context/AppStateContext.tsx |  16 +
 src/webview/pages/DashboardPage.tsx     |   6 +-
 src/webview/panel.css                   |  43 ++
 src/worker/embedWorker.ts               |  59 ++-
 14 files changed, 903 insertions(+), 126 deletions(-)
 create mode 100644 src/pipeline/nativeEmbeddingPipeline.ts
 create mode 100644 src/pipeline/pipelineConfig.ts

diff --git a/package.json b/package.json
index 72c705f..1805a32 100644
--- a/package.json
+++ b/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "joplin-note-categorization-plugin",
+  "name": "joplin-plugin-note-categorization",
   "version": "1.0.0",
   "scripts": {
     "dist": "webpack --env joplin-plugin-config=buildMain && webpack --env joplin-plugin-config=buildExtraScripts && npm run copyAssets && webpack --env joplin-plugin-config=createArchive",
diff --git a/src/commands/testEmbed.ts b/src/commands/testEmbed.ts
index 28b1fba..7e9265f 100644
--- a/src/commands/testEmbed.ts
+++ b/src/commands/testEmbed.ts
@@ -1,6 +1,5 @@
 import { fetchAllNotes } from '../pipeline/noteReader';
 import { benchmark } from '../pipeline/clustering/benchmark';
-import { CategorizationConfig } from '../types/cluster';
 import { averageVectors, blendVectors, computeTitleWeight, cosineSimilarity } from '../pipeline/vectorAggregator';
 import { NoteVector, WorkerMessage } from '../types/embed';
 import { isGenericTitle } from '../utils/titleFilter';
@@ -8,6 +7,7 @@ import { log, logErr } from '../utils/logger';
 import { getEncoding } from 'js-tiktoken';
 import { VectorCache } from '../pipeline/vectorCache';
 import { enrichResultsWithTags } from '../pipeline/clustering/postProcess';
+import { DEFAULT_CONFIG, isValidEmbeddingVector } from '../pipeline/pipelineConfig';
 
 // We use cl100k_base to approximate token counts for chunking.
 // The embedding model (all-MiniLM-L6-v2) uses a WordPiece tokenizer with a
@@ -117,16 +117,22 @@ export const runTestEmbed = async (installDir: string) => {
 			const cachedItem = await cache.getItem(note.id);
 
 			if (cachedItem && cachedItem.metadata.hash === currentNoteHash) {
-				log(`[${currentNoteIndex + 1}/${notes.length}] cache hit for "${note.title.slice(0, 30)}"`);
-				noteVectors.push({
-					noteId: note.id,
-					title: note.title,
-					vector: cachedItem.vector,
-					titleWeight: cachedItem.metadata.titleWeight ?? 0,
-				});
-				cachedCount++;
-				currentNoteIndex++;
-				continue;
+				if (isValidEmbeddingVector(cachedItem.vector)) {
+					log(`[${currentNoteIndex + 1}/${notes.length}] cache hit for "${note.title.slice(0, 30)}"`);
+					noteVectors.push({
+						noteId: note.id,
+						title: note.title,
+						vector: cachedItem.vector,
+						titleWeight: cachedItem.metadata.titleWeight ?? 0,
+					});
+					cachedCount++;
+					currentNoteIndex++;
+					continue;
+				} else {
+					log(
+						`[${currentNoteIndex + 1}/${notes.length}] cache invalid (contains null/NaN) for "${note.title.slice(0, 30)}"`,
+					);
+				}
 			}
 
 			break;
@@ -152,23 +158,9 @@ export const runTestEmbed = async (installDir: string) => {
 			// ── Clustering Benchmark ─────────────────────────────
 			// Edit this config to compare different algorithms and dimensions.
 			// Results are printed as a comparison table in the console.
-			const clusterConfig: CategorizationConfig = {
-				seed: 42,
-				metric: 'cosine',
-				intermediateDim: 10,
-				intermediateNeighbors: 15,
-				strategies: [
-					{ name: 'kmeans-5', algorithm: 'kmeans', K: 5 },
-					{ name: 'kmedoids-5', algorithm: 'kmedoids', K: 5 },
-					{ name: 'hdbscan-3', algorithm: 'hdbscan', minClusterSize: 3 },
-					{ name: 'hdbscan-3-ms2', algorithm: 'hdbscan', minClusterSize: 3, minSamples: 2 },
-					{ name: 'hdbscan-5-ms2', algorithm: 'hdbscan', minClusterSize: 5, minSamples: 2 },
-				],
-			};
-
 			if (noteVectors.length >= 3) {
 				const vectors = noteVectors.map((nv) => nv.vector);
-				const results = benchmark(vectors, clusterConfig);
+				const results = benchmark(vectors, DEFAULT_CONFIG);
 
 				const notesMap = new Map(notes.map((n) => [n.id, n]));
 				const allPipelineDocuments = noteVectors.map((nv) => {
@@ -191,7 +183,13 @@ export const runTestEmbed = async (installDir: string) => {
 						clusterNotes.get(c)!.push(noteVectors[i].title);
 					}
 					for (const [clusterId, titles] of clusterNotes) {
-						const label = clusterId < 0 ? 'Noise/Outliers' : `Cluster ${clusterId}`;
+						const generatedName = res.clusterNames?.[clusterId];
+						const label =
+							clusterId < 0
+								? 'Noise/Outliers'
+								: generatedName
+									? `${generatedName} (Cluster ${clusterId})`
+									: `Cluster ${clusterId}`;
 						const clusterTags = res.tags?.[clusterId] ? ` [Tags: ${res.tags[clusterId].join(', ')}]` : '';
 						log(`  ${label} (${titles.length} notes)${clusterTags}:`);
 						for (const title of titles) {
diff --git a/src/pipeline/UmapProjector.ts b/src/pipeline/UmapProjector.ts
index 72a4e18..5081db5 100644
--- a/src/pipeline/UmapProjector.ts
+++ b/src/pipeline/UmapProjector.ts
@@ -20,17 +20,40 @@ export class UmapProjector {
 	}
 
 	/**
-	 * Projects high-dimensional vectors to a lower-dimensional space using UMAP.
-	 * @param vectors N vectors of dimension D (N x D)
-	 * @returns N vectors of dimension nComponents
+	 * Projects vectors to a lower-dimensional space using UMAP.
+	 *
+	 * In distance-matrix mode, `vectors` must be index singletons `[[0], [1], ...]`
+	 * because umap-js requires a vectors array to call distanceFn(a, b).
+	 * We encode each point's index as its sole coordinate so the custom distanceFn
+	 * can look up precomputed distances via `distanceMatrix[a[0]][b[0]]`.
 	 */
-	public project(vectors: number[][]): number[][] {
+	public project(vectors: number[][], distanceMatrix?: number[][]): number[][] {
 		if (vectors.length === 0) return [];
 
-		const dim = vectors[0].length;
-		for (let i = 0; i < vectors.length; i++) {
-			if (vectors[i].length !== dim) {
-				throw new Error(`Vector at index ${i} has dimension ${vectors[i].length}, expected ${dim}`);
+		if (distanceMatrix) {
+			const n = vectors.length;
+			if (distanceMatrix.length !== n) {
+				throw new Error(`Distance matrix size (${distanceMatrix.length}) does not match vectors count (${n})`);
+			}
+			for (let i = 0; i < n; i++) {
+				if (vectors[i].length !== 1) {
+					throw new Error(
+						`Vector at index ${i} has dimension ${vectors[i].length}, expected 1 (index singleton)`,
+					);
+				}
+				const idx = vectors[i][0];
+				if (idx < 0 || idx >= n || !Number.isInteger(idx)) {
+					throw new Error(
+						`Vector index at position ${i} is invalid: ${idx}. Must be an integer between 0 and ${n - 1}.`,
+					);
+				}
+			}
+		} else {
+			const dim = vectors[0].length;
+			for (let i = 0; i < vectors.length; i++) {
+				if (vectors[i].length !== dim) {
+					throw new Error(`Vector at index ${i} has dimension ${vectors[i].length}, expected ${dim}`);
+				}
 			}
 		}
 
@@ -46,7 +69,14 @@ export class UmapProjector {
 
 		// nNeighbors must be less than the number of data points
 		const nNeighbors = Math.max(2, Math.min(this.nNeighbors, vectors.length - 1));
-		const distanceFn = this.metric === 'euclidean' ? euclideanDistance : cosineDistance;
+
+		// When using a precomputed distance matrix, vectors are index singletons [i].
+		// The distanceFn extracts indices to look up the precomputed distance.
+		const distanceFn = distanceMatrix
+			? (a: number[], b: number[]) => distanceMatrix[a[0]][b[0]]
+			: this.metric === 'euclidean'
+				? euclideanDistance
+				: cosineDistance;
 
 		const umap = new UMAP({
 			nComponents: this.nComponents,
@@ -57,7 +87,8 @@ export class UmapProjector {
 		});
 
 		log(
-			`UMAP: projecting ${vectors.length} vectors (${dim}D → ${this.nComponents}D), ` +
+			`UMAP: projecting ${vectors.length} vectors ` +
+				`${distanceMatrix ? '(using precomputed distance matrix)' : `(${vectors[0].length}D)`} → ${this.nComponents}D, ` +
 				`neighbors=${nNeighbors}, seed=${this.seed}`,
 		);
 
diff --git a/src/pipeline/clustering/benchmark.ts b/src/pipeline/clustering/benchmark.ts
index 89aeabf..5d87b0e 100644
--- a/src/pipeline/clustering/benchmark.ts
+++ b/src/pipeline/clustering/benchmark.ts
@@ -82,7 +82,11 @@ function logBenchmarkTable(results: BenchmarkResult[]): void {
  * @param config   Categorization config with strategies to benchmark
  * @returns        Benchmark results sorted by silhouette score (descending)
  */
-export function benchmark(vectors: number[][], config: CategorizationConfig): BenchmarkResult[] {
+export function benchmark(
+	vectors: number[][],
+	config: CategorizationConfig,
+	distanceMatrix?: number[][],
+): BenchmarkResult[] {
 	if (vectors.length === 0) {
 		log('No vectors to cluster.');
 		return [];
@@ -92,7 +96,19 @@ export function benchmark(vectors: number[][], config: CategorizationConfig): Be
 
 	// Optionally reduce dimensionality before clustering
 	let clusteringVectors = vectors;
-	if (config.intermediateDim !== null) {
+	if (distanceMatrix) {
+		// Clustering algos need coordinate vectors, not just pairwise distances.
+		// UMAP projects the distance matrix into coordinate space (default 10D).
+		const dim = config.intermediateDim ?? 10;
+		log(`Native mode: projecting distance matrix to ${dim}D coordinates for clustering...`);
+		const projector = new UmapProjector({
+			nComponents: dim,
+			nNeighbors: config.intermediateNeighbors,
+			metric: config.metric,
+			seed: config.seed,
+		});
+		clusteringVectors = projector.project(vectors, distanceMatrix);
+	} else if (config.intermediateDim !== null) {
 		log(`Reducing ${vectors[0].length}D → ${config.intermediateDim}D for clustering...`);
 		const projector = new UmapProjector({
 			nComponents: config.intermediateDim,
diff --git a/src/pipeline/clustering/postProcess.ts b/src/pipeline/clustering/postProcess.ts
index 7c88453..e33060d 100644
--- a/src/pipeline/clustering/postProcess.ts
+++ b/src/pipeline/clustering/postProcess.ts
@@ -145,6 +145,87 @@ export const STOP_WORDS = new Set([
 	'yourself',
 	'yourselves',
 
+	// More prepositions, adverbs, and common noise verbs (to clean up phrases)
+	'without',
+	'within',
+	'throughout',
+	'around',
+	'going',
+	'goes',
+	'went',
+	'getting',
+	'got',
+	'having',
+	'making',
+	'taking',
+	'actually',
+	'really',
+	'basically',
+	'simply',
+	'mainly',
+	'mostly',
+	'highly',
+	'fully',
+	'totally',
+	'completely',
+	'extremely',
+	'very',
+	'quite',
+	'pretty',
+	'somewhat',
+	'rather',
+	'indeed',
+	'always',
+	'never',
+	'sometimes',
+	'often',
+	'usually',
+	'probably',
+	'possibly',
+	'maybe',
+	'crazy',
+	'easy',
+	'hard',
+	'difficult',
+	'simple',
+	'good',
+	'bad',
+	'best',
+	'worst',
+	'better',
+	'worse',
+	'new',
+	'old',
+	'first',
+	'last',
+	'next',
+	'prev',
+	'previous',
+	'current',
+	'different',
+	'same',
+	'other',
+	'another',
+	'each',
+	'every',
+	'many',
+	'much',
+	'few',
+	'several',
+	'some',
+	'any',
+	'no',
+	'work',
+	'thing',
+	'things',
+	'stuff',
+	'name',
+	'value',
+	'data',
+	'user',
+	'item',
+	'items',
+
 	// Markdown/HTML structure words or general noise words (all length >= 3)
 	'http',
 	'https',
@@ -246,6 +327,9 @@ export const STOP_WORDS = new Set([
 /** Words that look like plurals but should not be singularized. */
 const SINGULAR_EXCEPTIONS = new Set(['series', 'species', 'means', 'news', 'analysis', 'basis', 'crisis']);
 
+/** Unigrams with character length at or below this threshold receive a 0.5x scoring penalty. */
+const SHORT_UNIGRAM_THRESHOLD = 4;
+
 /**
  * Strips code blocks, inline code, HTML tags, markdown links/images, and URLs
  * from text to avoid polluting tag extraction.
@@ -361,30 +445,48 @@ export class TfidfExtractor {
 		}
 	}
 
+	/**
+	 * Splits the text by sentence/line boundaries and generates ngrams within segments.
+	 * This prevents forming cross-boundary ngrams (like joining separate lines or sentences).
+	 */
+	private getSegmentNgrams(text: string): string[] {
+		if (!text) return [];
+		// Split by sentence punctuation, newlines, markdown headers, and list bullets
+		const segments = text.split(/[.,?!;:\n\r\-*#()[\]]+/);
+		const allNgrams: string[] = [];
+		for (const seg of segments) {
+			const tokens = tokenize(seg);
+			const ngrams = getNgrams(tokens);
+			for (const ng of ngrams) {
+				// Filter out any ngrams with consecutive duplicate words (e.g. "day day")
+				if (!hasConsecutiveDuplicates(ng)) {
+					allNgrams.push(ng);
+				}
+			}
+		}
+		return allNgrams;
+	}
+
 	/**
 	 * Returns the unique set of words/ngrams in a document (title + body), used for IDF counting.
 	 * No title weighting — each document contributes at most 1 to each ngram's document frequency.
 	 */
 	private getUniqueDocumentWords(doc: DocumentText): Set<string> {
-		const titleWords = tokenize(doc.title || '');
-		const bodyWords = tokenize(doc.body || '');
-		const titleNgrams = getNgrams(titleWords);
-		const bodyNgrams = getNgrams(bodyWords);
+		const titleNgrams = this.getSegmentNgrams(doc.title || '');
+		const bodyNgrams = this.getSegmentNgrams(doc.body || '');
 		return new Set([...titleNgrams, ...bodyNgrams]);
 	}
 
 	/**
-	 * Returns ngrams for TF scoring with title words weighted 3x higher.
+	 * Returns ngrams for TF scoring with title words weighted 5x higher.
 	 * Uses push loops instead of spread to avoid excess intermediate array allocations.
 	 */
 	private getWeightedWords(doc: DocumentText): string[] {
-		const titleWords = tokenize(doc.title || '');
-		const bodyWords = tokenize(doc.body || '');
-		const titleNgrams = getNgrams(titleWords);
-		const bodyNgrams = getNgrams(bodyWords);
+		const titleNgrams = this.getSegmentNgrams(doc.title || '');
+		const bodyNgrams = this.getSegmentNgrams(doc.body || '');
 		const result: string[] = [];
-		// Title ngrams appear 3 times to boost their term frequency
-		for (let i = 0; i < 3; i++) {
+		// Title ngrams appear 5 times to boost their term frequency
+		for (let i = 0; i < 5; i++) {
 			for (const ng of titleNgrams) {
 				result.push(ng);
 			}
@@ -396,61 +498,341 @@ export class TfidfExtractor {
 	}
 
 	/**
-	 * Computes TF-IDF scores for ngrams in the cluster documents and returns the top K.
+	 * Computes sorted TF-IDF scores for ngrams in the cluster documents.
+	 * Incorporates Cluster Frequency (CF) weighting, Length Boosting, and Title Match Boosting.
 	 */
-	public extractClusterTags(clusterDocuments: DocumentText[], topK = 5): string[] {
+	public extractClusterNgramsWithScores(clusterDocuments: DocumentText[]): { ngram: string; score: number }[] {
 		if (clusterDocuments.length === 0) return [];
 
-		const tfs: { [word: string]: number } = {};
-		let totalWords = 0;
+		const tfs: { [ngram: string]: number } = {};
+		let totalNgrams = 0;
 
 		for (const doc of clusterDocuments) {
 			const weighted = this.getWeightedWords(doc);
-			for (const w of weighted) {
-				tfs[w] = (tfs[w] || 0) + 1;
-				totalWords++;
+			for (const ng of weighted) {
+				tfs[ng] = (tfs[ng] || 0) + 1;
+				totalNgrams++;
 			}
 		}
 
-		if (totalWords === 0) return [];
+		if (totalNgrams === 0) return [];
+
+		// Count how many documents in the cluster contain each ngram
+		const docCounts: { [ngram: string]: number } = {};
+		for (const doc of clusterDocuments) {
+			const titleNgrams = this.getSegmentNgrams(doc.title || '');
+			const bodyNgrams = this.getSegmentNgrams(doc.body || '');
+			const docNgrams = new Set([...titleNgrams, ...bodyNgrams]);
+			for (const ng of docNgrams) {
+				docCounts[ng] = (docCounts[ng] || 0) + 1;
+			}
+		}
 
-		const scores: { word: string; score: number }[] = [];
+		const scores: { ngram: string; score: number }[] = [];
 
-		for (const word of Object.keys(tfs)) {
-			const tf = tfs[word] / totalWords;
-			const idf = this.idfs[word] || 0; // default to 0 if word is ignored/generic
+		for (const ngram of Object.keys(tfs)) {
+			const idf = this.idfs[ngram] || 0; // default to 0 if word is ignored/generic
 			if (idf > 0) {
-				scores.push({ word, score: tf * idf });
+				const tf = tfs[ngram] / totalNgrams;
+				const cf = (docCounts[ngram] || 0) / clusterDocuments.length;
+
+				// Length boost: 1.0x for unigram, 1.5x for bigram, 2.0x for trigram
+				const wordCount = ngram.split(' ').length;
+				let lengthBoost = 1.0 + (wordCount - 1) * 0.5;
+
+				// Penalize very short unigrams (length <= 4) to favor longer descriptive phrases
+				if (wordCount === 1 && ngram.length <= SHORT_UNIGRAM_THRESHOLD) {
+					lengthBoost *= 0.5;
+				}
+
+				// Title match boost: 1.5x if it appears in any note title in this cluster
+				let appearsInTitle = false;
+				for (const doc of clusterDocuments) {
+					const titleNgrams = new Set(this.getSegmentNgrams(doc.title || ''));
+					if (titleNgrams.has(ngram)) {
+						appearsInTitle = true;
+						break;
+					}
+				}
+				const titleBoost = appearsInTitle ? 1.5 : 1.0;
+
+				const finalScore = tf * idf * cf * lengthBoost * titleBoost;
+				scores.push({ ngram, score: finalScore });
 			}
 		}
 
 		scores.sort((a, b) => b.score - a.score);
+		return scores;
+	}
 
-		const selectedTags: string[] = [];
-		const usedWords = new Set<string>();
+	/**
+	 * Computes TF-IDF scores for ngrams in the cluster documents and returns the top K.
+	 */
+	public extractClusterTags(clusterDocuments: DocumentText[], topK = 5): string[] {
+		const scores = this.extractClusterNgramsWithScores(clusterDocuments);
+		return selectDedupedTags(scores, topK);
+	}
+}
 
-		for (const candidate of scores) {
-			if (selectedTags.length >= topK) break;
+/**
+ * Checks if a phrase contains consecutive identical words.
+ */
+export function hasConsecutiveDuplicates(phrase: string): boolean {
+	const words = phrase.toLowerCase().split(' ');
+	for (let i = 0; i < words.length - 1; i++) {
+		if (words[i] === words[i + 1]) return true;
+	}
+	return false;
+}
 
-			const constituentWords = candidate.word.split(' ');
-			const allUsed = constituentWords.every((w) => usedWords.has(w));
-			if (!allUsed) {
-				selectedTags.push(candidate.word);
-				for (const w of constituentWords) {
-					usedWords.add(w);
+/**
+ * Filters out unigrams (single-word candidates) that are part of a stronger
+ * multi-word candidate (bigram/trigram) with a score >= 50% of the unigram's score.
+ */
+export function filterDemotedUnigrams(scores: { ngram: string; score: number }[]): { ngram: string; score: number }[] {
+	return scores.filter((candidate) => {
+		const wordCount = candidate.ngram.split(' ').length;
+		if (wordCount === 1) {
+			const hasStrongerPhrase = scores.some((other) => {
+				const otherWordCount = other.ngram.split(' ').length;
+				if (otherWordCount > 1) {
+					const constituentWords = new Set(other.ngram.toLowerCase().split(' '));
+					if (constituentWords.has(candidate.ngram.toLowerCase()) && other.score >= candidate.score * 0.5) {
+						return true;
+					}
 				}
+				return false;
+			});
+			if (hasStrongerPhrase) {
+				return false;
 			}
 		}
+		return true;
+	});
+}
 
-		return selectedTags;
+/**
+ * Selects up to `topK` tags from pre-computed ngram scores using deduplication rules:
+ * - Unigrams must be unique (no shared words with already-selected tags)
+ * - Bigrams/trigrams can share at most 1 word with already-selected tags
+ */
+export function selectDedupedTags(scores: { ngram: string; score: number }[], topK: number): string[] {
+	const filteredScores = filterDemotedUnigrams(scores);
+	const selectedTags: string[] = [];
+	const usedWords = new Set<string>();
+
+	for (const candidate of filteredScores) {
+		if (selectedTags.length >= topK) break;
+
+		const constituentWords = candidate.ngram.split(' ');
+		const limit = constituentWords.length === 1 ? 0 : 1;
+		let sharedCount = 0;
+		for (const w of constituentWords) {
+			if (usedWords.has(w)) {
+				sharedCount++;
+			}
+		}
+
+		if (sharedCount <= limit) {
+			selectedTags.push(candidate.ngram);
+			for (const w of constituentWords) {
+				usedWords.add(w);
+			}
+		}
+	}
+
+	return selectedTags;
+}
+
+const ACRONYMS = new Set(['sip', 'api', 'ui', 'url', 'html', 'css', 'js', 'db', 'sql', 'onnx']);
+
+/**
+ * Capitalizes a phrase to Title Case, preserving common acronyms in uppercase.
+ */
+export function toTitleCase(phrase: string): string {
+	return phrase
+		.split(' ')
+		.map((word) => {
+			const lower = word.toLowerCase();
+			if (ACRONYMS.has(lower)) {
+				return word.toUpperCase();
+			}
+			return word.charAt(0).toUpperCase() + word.slice(1);
+		})
+		.join(' ');
+}
+
+/**
+ * Checks if two phrases share any words (case-insensitive).
+ */
+export function shareWords(phraseA: string, phraseB: string): boolean {
+	const wordsA = new Set(phraseA.toLowerCase().split(' '));
+	const wordsB = phraseB.toLowerCase().split(' ');
+	return wordsB.some((w) => wordsA.has(w));
+}
+
+const TAXONOMY_MAPPING: { keywords: string[]; category: string }[] = [
+	{
+		keywords: ['travel', 'flight', 'trip', 'train', 'vacation', 'backpacking', 'itinerary', 'packing', 'flights'],
+		category: 'Travel',
+	},
+	{
+		keywords: [
+			'fund',
+			'stock',
+			'invest',
+			'portfolio',
+			'finance',
+			'saving',
+			'tax',
+			'sip',
+			'lump',
+			'stocks',
+			'funds',
+			'investment',
+			'investments',
+		],
+		category: 'Investment',
+	},
+	{
+		keywords: ['prep', 'smoothie', 'protein', 'macro', 'macros', 'diet', 'nutrition', 'meal'],
+		category: 'Meal Prep',
+	},
+	{
+		keywords: [
+			'recipe',
+			'recipes',
+			'starter',
+			'sourdough',
+			'flour',
+			'baking',
+			'bread',
+			'banana',
+			'pasta',
+			'skillet',
+			'cook',
+			'cooking',
+			'kitchen',
+		],
+		category: 'Recipes',
+	},
+	{
+		keywords: [
+			'workout',
+			'overload',
+			'stretch',
+			'stretching',
+			'routine',
+			'pain',
+			'fitness',
+			'exercise',
+			'gym',
+			'cardio',
+			'back',
+			'sitting',
+		],
+		category: 'Workout',
+	},
+	{
+		keywords: [
+			'code',
+			'program',
+			'javascript',
+			'typescript',
+			'node',
+			'git',
+			'docker',
+			'graphql',
+			'rest',
+			'api',
+			'jest',
+			'test',
+			'error',
+			'request',
+			'programming',
+			'software',
+			'developer',
+		],
+		category: 'Programming',
+	},
+	{
+		keywords: [
+			'psychology',
+			'money',
+			'meaning',
+			'philosophy',
+			'ravikant',
+			'almanack',
+			'book',
+			'quotes',
+			'thoughts',
+			'reading',
+			'naval',
+		],
+		category: 'Books & Philosophy',
+	},
+];
+
+/**
+ * Checks the top 3 ngrams of a cluster against a static taxonomy to match common topics.
+ */
+export function getTaxonomyCategory(scores: { ngram: string; score: number }[]): string | null {
+	const candidates = scores.slice(0, 3).map((s) => s.ngram.toLowerCase());
+
+	for (const cand of candidates) {
+		const words = cand.split(' ');
+		for (const mapping of TAXONOMY_MAPPING) {
+			for (const keyword of mapping.keywords) {
+				if (words.includes(keyword) || cand === keyword) {
+					return mapping.category;
+				}
+			}
+		}
 	}
+	return null;
 }
 
 /**
- * Enriches benchmark results with extracted TF-IDF tags for each cluster.
+ * Generates a descriptive name for a cluster using the scoring list and clusterId.
+ */
+export function generateClusterName(scores: { ngram: string; score: number }[], clusterId: number): string {
+	const filteredScores = filterDemotedUnigrams(scores);
+
+	if (filteredScores.length === 0 || filteredScores[0].score <= 0) {
+		return clusterId % 2 === 0 ? 'General' : 'Miscellaneous';
+	}
+
+	// Try matching against high-level taxonomy first
+	const taxonomyCategory = getTaxonomyCategory(filteredScores);
+	if (taxonomyCategory) {
+		return taxonomyCategory;
+	}
+
+	const top1 = filteredScores[0];
+	let top2: { ngram: string; score: number } | undefined;
+
+	// Find the next highest-scoring phrase that doesn't share any words with the first phrase
+	for (let i = 1; i < filteredScores.length; i++) {
+		if (filteredScores[i].score <= 0) break;
+		if (!shareWords(top1.ngram, filteredScores[i].ngram)) {
+			top2 = filteredScores[i];
+			break;
+		}
+	}
+
+	// Join them if the second has at least 60% of the score of the first
+	if (top2 && top2.score >= top1.score * 0.6) {
+		return `${toTitleCase(top1.ngram)} & ${toTitleCase(top2.ngram)}`;
+	}
+
+	return toTitleCase(top1.ngram);
+}
+
+/**
+ * Enriches benchmark results with extracted TF-IDF tags and cluster names for each cluster.
  *
  * Builds the TF-IDF corpus from all pipeline documents once, then iterates
- * over each strategy result to extract the top tags per cluster.
+ * over each strategy result to extract the top tags and generated names per cluster.
  *
  * @param results    Benchmark results from the clustering pipeline
  * @param documents  All note documents used in the pipeline (same order as noteVectors)
@@ -461,6 +843,7 @@ export function enrichResultsWithTags(results: BenchmarkResult[], documents: Doc
 
 	for (const result of results) {
 		const tags: { [clusterId: number]: string[] } = {};
+		const clusterNames: { [clusterId: number]: string } = {};
 
 		const clusterIndices: { [clusterId: number]: number[] } = {};
 		result.assignments.forEach((clusterId, noteIdx) => {
@@ -472,14 +855,54 @@ export function enrichResultsWithTags(results: BenchmarkResult[], documents: Doc
 			}
 		});
 
+		// Cache ngram scores to avoid recomputation during collision resolution
+		const cachedScores: { [clusterId: number]: { ngram: string; score: number }[] } = {};
+
 		for (const clusterIdStr of Object.keys(clusterIndices)) {
 			const clusterId = Number(clusterIdStr);
 			const indices = clusterIndices[clusterId];
 
 			const clusterDocuments = indices.map((idx) => documents[idx]);
-			tags[clusterId] = tfidfExtractor.extractClusterTags(clusterDocuments, topK);
+			const ngramScores = tfidfExtractor.extractClusterNgramsWithScores(clusterDocuments);
+			cachedScores[clusterId] = ngramScores;
+
+			tags[clusterId] = selectDedupedTags(ngramScores, topK);
+			clusterNames[clusterId] = generateClusterName(ngramScores, clusterId);
+		}
+
+		// Count occurrences of each mapped name to identify collisions (e.g. multiple "Recipes" sections)
+		const nameCounts: { [name: string]: number } = {};
+		for (const idStr of Object.keys(clusterNames)) {
+			const name = clusterNames[Number(idStr)];
+			nameCounts[name] = (nameCounts[name] || 0) + 1;
+		}
+
+		// Resolve duplicates by appending the cluster's top-scoring candidate keyword in parentheses
+		const usedNames = new Set<string>(
+			Object.values(clusterNames).filter((name) => nameCounts[name] === 1),
+		);
+
+		for (const idStr of Object.keys(clusterNames)) {
+			const id = Number(idStr);
+			const name = clusterNames[id];
+			if (nameCounts[name] > 1) {
+				const filteredScores = filterDemotedUnigrams(cachedScores[id]);
+				if (filteredScores.length > 0 && filteredScores[0].score > 0) {
+					const subTopic = toTitleCase(filteredScores[0].ngram);
+					let resolved = `${name} (${subTopic})`;
+					// Guard against re-collision: append numeric suffix if still duplicate
+					if (usedNames.has(resolved)) {
+						let suffix = 2;
+						while (usedNames.has(`${resolved} ${suffix}`)) suffix++;
+						resolved = `${resolved} ${suffix}`;
+					}
+					clusterNames[id] = resolved;
+					usedNames.add(resolved);
+				}
+			}
 		}
 
 		result.tags = tags;
+		result.clusterNames = clusterNames;
 	}
 }
diff --git a/src/pipeline/nativeEmbeddingPipeline.ts b/src/pipeline/nativeEmbeddingPipeline.ts
new file mode 100644
index 0000000..761d9e4
--- /dev/null
+++ b/src/pipeline/nativeEmbeddingPipeline.ts
@@ -0,0 +1,71 @@
+import joplin from 'api';
+import { log } from '../utils/logger';
+
+export interface NativeEmbeddingChunk {
+	noteId: string;
+	chunkIndex: number;
+	chunkText: string;
+	vector: number[];
+}
+
+/**
+ * Checks if Joplin's native AI indexing is active and ready.
+ */
+export const isNativeAiReady = async (): Promise<boolean> => {
+	try {
+		const status = await (joplin as any).ai.getIndexStatus();
+		const ready = !!(status && status.ready);
+		log(`Native AI check - state: ${status?.state}, ready: ${ready}, modelId: ${status?.modelId}`);
+		return ready;
+	} catch (err: any) {
+		log('Native AI check failed:', err.message);
+		return false;
+	}
+};
+
+/**
+ * Pages through Joplin's native index to fetch raw embedding vectors for the requested notes.
+ */
+export const fetchNativeEmbeddings = async (noteIds: string[]): Promise<NativeEmbeddingChunk[]> => {
+	if (noteIds.length === 0) return [];
+
+	log(`Fetching native embeddings for ${noteIds.length} notes...`);
+	const chunks: NativeEmbeddingChunk[] = [];
+	const BATCH_SIZE = 500;
+	let modelId: string | null = null;
+
+	for (let i = 0; i < noteIds.length; i += BATCH_SIZE) {
+		const batchIds = noteIds.slice(i, i + BATCH_SIZE);
+		let cursor: string | undefined;
+		const seenCursors = new Set<string>();
+
+		do {
+			const page = await (joplin as any).ai.getEmbeddings({
+				noteIds: batchIds,
+				cursor,
+				limit: 1000,
+			});
+
+			if (!page || !Array.isArray(page.chunks)) {
+				throw new Error('Invalid response from Joplin native getEmbeddings API');
+			}
+
+			if (modelId && page.modelId !== modelId) {
+				throw new Error('Embedding model changed mid-fetch. Please restart.');
+			}
+			modelId = page.modelId;
+			chunks.push(...page.chunks);
+			cursor = page.nextCursor;
+
+			if (cursor) {
+				if (seenCursors.has(cursor)) {
+					throw new Error('Detected duplicate cursor in pagination, aborting to prevent infinite loop.');
+				}
+				seenCursors.add(cursor);
+			}
+		} while (cursor);
+	}
+
+	log(`Successfully fetched ${chunks.length} embedding chunks`);
+	return chunks;
+};
diff --git a/src/pipeline/pipelineConfig.ts b/src/pipeline/pipelineConfig.ts
new file mode 100644
index 0000000..cd67e56
--- /dev/null
+++ b/src/pipeline/pipelineConfig.ts
@@ -0,0 +1,23 @@
+import { CategorizationConfig } from '../types/cluster';
+
+/** Dimensionality of embedding vectors (all-MiniLM-L6-v2 / multilingual-e5-small). */
+export const EMBEDDING_DIM = 384;
+
+export function isValidEmbeddingVector(vector: number[] | undefined | null): boolean {
+	if (!vector) return false;
+	if (vector.length !== EMBEDDING_DIM) return false;
+	return vector.every((v) => v !== null && !Number.isNaN(v));
+}
+
+export const DEFAULT_CONFIG: CategorizationConfig = {
+	seed: 42,
+	metric: 'cosine',
+	intermediateDim: 8,
+	intermediateNeighbors: 5,
+	strategies: [
+		{ name: 'kmeans-6', algorithm: 'kmeans', K: 6 },
+		{ name: 'kmedoids-6', algorithm: 'kmedoids', K: 6 },
+		{ name: 'hdbscan-tuned', algorithm: 'hdbscan', minClusterSize: 4, minSamples: 1 },
+		{ name: 'hdbscan-conservative', algorithm: 'hdbscan', minClusterSize: 3, minSamples: 2 },
+	],
+};
diff --git a/src/pipeline/runPipeline.ts b/src/pipeline/runPipeline.ts
index dd964f8..62bc63b 100644
--- a/src/pipeline/runPipeline.ts
+++ b/src/pipeline/runPipeline.ts
@@ -1,6 +1,5 @@
 import { fetchAllNotes } from './noteReader';
 import { benchmark } from './clustering/benchmark';
-import { CategorizationConfig } from '../types/cluster';
 import { averageVectors, blendVectors, computeTitleWeight, cosineSimilarity } from './vectorAggregator';
 import { NoteVector, WorkerMessage } from '../types/embed';
 import { PanelNote } from '../types/panel';
@@ -9,25 +8,13 @@ import { log, logErr } from '../utils/logger';
 import { getEncoding } from 'js-tiktoken';
 import { VectorCache } from './vectorCache';
 import { enrichResultsWithTags } from './clustering/postProcess';
+import { isNativeAiReady, fetchNativeEmbeddings } from './nativeEmbeddingPipeline';
+import { DEFAULT_CONFIG, isValidEmbeddingVector } from './pipelineConfig';
 
 // See testEmbed.ts for rationale on cl100k_base and the 200-token limit.
 const enc = getEncoding('cl100k_base');
 const MAX_TOKENS = 200;
 
-const DEFAULT_CONFIG: CategorizationConfig = {
-	seed: 42,
-	metric: 'cosine',
-	intermediateDim: 10,
-	intermediateNeighbors: 15,
-	strategies: [
-		{ name: 'kmeans-5', algorithm: 'kmeans', K: 5 },
-		{ name: 'kmedoids-5', algorithm: 'kmedoids', K: 5 },
-		{ name: 'hdbscan-3', algorithm: 'hdbscan', minClusterSize: 3 },
-		{ name: 'hdbscan-3-ms2', algorithm: 'hdbscan', minClusterSize: 3, minSamples: 2 },
-		{ name: 'hdbscan-5-ms2', algorithm: 'hdbscan', minClusterSize: 5, minSamples: 2 },
-	],
-};
-
 export interface PipelineCallbacks {
 	onStatus: (text: string) => void;
 	onProgress: (current: number, total: number, cached: number, skipped: number) => void;
@@ -52,6 +39,76 @@ export const runPipeline = async (installDir: string, callbacks: PipelineCallbac
 			return;
 		}
 
+		if (notes.length < 3) {
+			callbacks.onError('Too few notes for clustering (need at least 3).');
+			return;
+		}
+
+		if (await isNativeAiReady()) {
+			log('Native AI Search active: using native embeddings pipeline');
+			callbacks.onStatus('Fetching native embeddings...');
+
+			try {
+				const noteIds = notes.map((n) => n.id);
+				const chunks = await fetchNativeEmbeddings(noteIds);
+
+				// Group chunks by noteId
+				const noteChunksMap = new Map<string, number[][]>();
+				for (const chunk of chunks) {
+					const list = noteChunksMap.get(chunk.noteId) || [];
+					list.push(chunk.vector);
+					noteChunksMap.set(chunk.noteId, list);
+				}
+
+				const validNotes: typeof notes = [];
+				const vectors: number[][] = [];
+
+				for (const note of notes) {
+					const chunkVectors = noteChunksMap.get(note.id);
+					if (chunkVectors && chunkVectors.length > 0) {
+						const avgVector = averageVectors(chunkVectors);
+
+						if (isValidEmbeddingVector(avgVector)) {
+							vectors.push(avgVector);
+							validNotes.push(note);
+						} else {
+							logErr(
+								`Native embedding for note "${note.title}" contains NaN/null or wrong dimension. Skipping.`,
+							);
+						}
+					}
+				}
+
+				log(`Grouped embeddings: found ${vectors.length} notes with valid embeddings.`);
+
+				if (validNotes.length < 3) {
+					log('Too few indexed notes found in native DB. Falling back to local ONNX Web Worker.');
+				} else {
+					callbacks.onStatus('Clustering...');
+					const results = benchmark(vectors, DEFAULT_CONFIG);
+
+					const allPipelineDocuments = validNotes.map((n) => ({
+						title: n.title,
+						body: n.body,
+					}));
+
+					enrichResultsWithTags(results, allPipelineDocuments);
+
+					const panelNotes: PanelNote[] = validNotes.map((n) => ({
+						noteId: n.id,
+						title: n.title,
+					}));
+
+					callbacks.onComplete(results, panelNotes);
+					return;
+				}
+			} catch (err: any) {
+				logErr('Failed to run native embeddings pipeline:', err.message);
+			}
+		}
+
+		log('Native AI Search unavailable: falling back to local ONNX Web Worker');
+
 		const cache = await VectorCache.create();
 
 		// Remove notes from cache that are no longer in Joplin
@@ -142,17 +199,23 @@ export const runPipeline = async (installDir: string, callbacks: PipelineCallbac
 				const cachedItem = await cache.getItem(note.id);
 
 				if (cachedItem && cachedItem.metadata.hash === currentNoteHash) {
-					log(`[${currentNoteIndex + 1}/${notes.length}] cache hit for "${note.title.slice(0, 30)}"`);
-					noteVectors.push({
-						noteId: note.id,
-						title: note.title,
-						vector: cachedItem.vector,
-						titleWeight: cachedItem.metadata.titleWeight ?? 0,
-					});
-					cachedCount++;
-					currentNoteIndex++;
-					reportProgress();
-					continue;
+					if (isValidEmbeddingVector(cachedItem.vector)) {
+						log(`[${currentNoteIndex + 1}/${notes.length}] cache hit for "${note.title.slice(0, 30)}"`);
+						noteVectors.push({
+							noteId: note.id,
+							title: note.title,
+							vector: cachedItem.vector,
+							titleWeight: cachedItem.metadata.titleWeight ?? 0,
+						});
+						cachedCount++;
+						currentNoteIndex++;
+						reportProgress();
+						continue;
+					} else {
+						log(
+							`[${currentNoteIndex + 1}/${notes.length}] cache invalid (contains null/NaN) for "${note.title.slice(0, 30)}"`,
+						);
+					}
 				}
 
 				break;
diff --git a/src/types/cluster.ts b/src/types/cluster.ts
index 6365583..a2b9271 100644
--- a/src/types/cluster.ts
+++ b/src/types/cluster.ts
@@ -43,4 +43,6 @@ export interface BenchmarkResult {
 	timeMs: number;
 	/** Extracted tags for each cluster, keyed by cluster ID. Outliers (-1) are excluded. */
 	tags?: { [clusterId: number]: string[] };
+	/** Generated name for each cluster, keyed by cluster ID. Outliers (-1) are excluded. */
+	clusterNames?: { [clusterId: number]: string };
 }
diff --git a/src/webview/components/ClusterCard.tsx b/src/webview/components/ClusterCard.tsx
index 334c0cf..03b06c7 100644
--- a/src/webview/components/ClusterCard.tsx
+++ b/src/webview/components/ClusterCard.tsx
@@ -7,15 +7,49 @@ interface ClusterCardProps {
 	notes: PanelNote[];
 	isNoise?: boolean;
 	tags?: string[];
+	onRename?: (newName: string) => void;
 }
 
-export const ClusterCard: React.FC<ClusterCardProps> = ({ title, noteIndices, notes, isNoise, tags }) => {
+export const ClusterCard: React.FC<ClusterCardProps> = ({ title, noteIndices, notes, isNoise, tags, onRename }) => {
 	const [isExpanded, setIsExpanded] = React.useState(false);
+	const [isEditing, setIsEditing] = React.useState(false);
+	const [editValue, setEditValue] = React.useState(title);
+
+	React.useEffect(() => {
+		setEditValue(title);
+	}, [title]);
 
 	const handleHeaderClick = () => {
 		setIsExpanded((prev) => !prev);
 	};
 
+	const handleEditClick = (e: React.MouseEvent) => {
+		e.stopPropagation();
+		setIsEditing(true);
+	};
+
+	const handleSave = (e?: React.FormEvent | React.FocusEvent) => {
+		if (e) {
+			e.stopPropagation();
+			if ('preventDefault' in e) e.preventDefault();
+		}
+		const trimmed = editValue.trim();
+		if (trimmed && trimmed !== title && onRename) {
+			onRename(trimmed);
+		}
+		setIsEditing(false);
+	};
+
+	const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
+		e.stopPropagation();
+		if (e.key === 'Enter') {
+			handleSave();
+		} else if (e.key === 'Escape') {
+			setEditValue(title);
+			setIsEditing(false);
+		}
+	};
+
 	const handleNoteClick = (noteId: string) => {
 		webviewApi.postMessage({ type: 'openNote', noteId });
 	};
@@ -27,7 +61,39 @@ export const ClusterCard: React.FC<ClusterCardProps> = ({ title, noteIndices, no
 		<div className={`cluster-card${isNoise ? ' noise' : ''}${isExpanded ? ' expanded' : ''}`}>
 			<div className="cluster-header" onClick={handleHeaderClick}>
 				<div className="cluster-header-left">
-					<span className="cluster-title">{title}</span>
+					{isEditing ? (
+						<input
+							type="text"
+							className="cluster-title-input"
+							value={editValue}
+							onChange={(e) => setEditValue(e.target.value)}
+							onBlur={handleSave}
+							onKeyDown={handleKeyDown}
+							onClick={(e) => e.stopPropagation()}
+							autoFocus
+						/>
+					) : (
+						<div className="cluster-title-container">
+							<span className="cluster-title">{title}</span>
+							{!isNoise && onRename && (
+								<button className="cluster-edit-btn" onClick={handleEditClick} title="Rename category">
+									<svg
+										width="11"
+										height="11"
+										viewBox="0 0 24 24"
+										fill="none"
+										stroke="currentColor"
+										strokeWidth="2.0"
+										strokeLinecap="round"
+										strokeLinejoin="round"
+									>
+										<path d="M12 20h9" />
+										<path d="M16.5 3.5a2.12 2.12 0 0 1 3 3L7 19l-4 1 1-4Z" />
+									</svg>
+								</button>
+							)}
+						</div>
+					)}
 					{tags && tags.length > 0 && (
 						<div className="cluster-tags">
 							{tags.map((tag, idx) => (
diff --git a/src/webview/context/AppStateContext.tsx b/src/webview/context/AppStateContext.tsx
index 220b326..696bbdf 100644
--- a/src/webview/context/AppStateContext.tsx
+++ b/src/webview/context/AppStateContext.tsx
@@ -17,6 +17,7 @@ interface AppStateContextType {
 	runPipeline: () => void;
 	changeStrategy: (index: number) => void;
 	setView: (view: ViewType) => void;
+	updateClusterName: (clusterId: number, newName: string) => void;
 }
 
 const AppStateContext = React.createContext<AppStateContextType | undefined>(undefined);
@@ -125,6 +126,20 @@ export const AppStateProvider: React.FC<{ children: React.ReactNode }> = ({ chil
 		setActiveView(view);
 	};
 
+	const updateClusterName = (clusterId: number, newName: string) => {
+		setStrategies((prev) => {
+			const next = [...prev];
+			if (next[selectedStrategyIndex]) {
+				const strat = { ...next[selectedStrategyIndex] };
+				const newClusterNames = { ...strat.clusterNames };
+				newClusterNames[clusterId] = newName;
+				strat.clusterNames = newClusterNames;
+				next[selectedStrategyIndex] = strat;
+			}
+			return next;
+		});
+	};
+
 	return (
 		<AppStateContext.Provider
 			value={{
@@ -139,6 +154,7 @@ export const AppStateProvider: React.FC<{ children: React.ReactNode }> = ({ chil
 				runPipeline,
 				changeStrategy,
 				setView,
+				updateClusterName,
 			}}
 		>
 			{children}
diff --git a/src/webview/pages/DashboardPage.tsx b/src/webview/pages/DashboardPage.tsx
index 5dc3e35..0c008d0 100644
--- a/src/webview/pages/DashboardPage.tsx
+++ b/src/webview/pages/DashboardPage.tsx
@@ -5,7 +5,8 @@ import { StrategySection } from '../components/StrategySection';
 import { ClusterCard } from '../components/ClusterCard';
 
 export const DashboardPage: React.FC = () => {
-	const { isRunning, runPipeline, strategies, selectedStrategyIndex, changeStrategy, notes } = useAppState();
+	const { isRunning, runPipeline, strategies, selectedStrategyIndex, changeStrategy, notes, updateClusterName } =
+		useAppState();
 
 	const selectedStrategy = strategies[selectedStrategyIndex];
 
@@ -43,10 +44,11 @@ export const DashboardPage: React.FC = () => {
 				{sortedClusterIds.map((id, idx) => (
 					<ClusterCard
 						key={id}
-						title={`Cluster ${idx + 1}`}
+						title={selectedStrategy.clusterNames?.[id] || `Cluster ${idx + 1}`}
 						noteIndices={clusters[id]}
 						notes={notes}
 						tags={selectedStrategy.tags?.[id]}
+						onRename={(newName) => updateClusterName(id, newName)}
 					/>
 				))}
 				{noise.length > 0 && (
diff --git a/src/webview/panel.css b/src/webview/panel.css
index 18b1020..0b7862c 100644
--- a/src/webview/panel.css
+++ b/src/webview/panel.css
@@ -443,3 +443,46 @@ body {
 .config-card-item {
 	margin: 4px 0;
 }
+
+/* --- Cluster Title Editing --- */
+
+.cluster-title-container {
+	display: inline-flex;
+	align-items: center;
+	gap: 6px;
+	max-width: 100%;
+}
+
+.cluster-edit-btn {
+	background: transparent;
+	border: none;
+	padding: 2px 4px;
+	border-radius: 4px;
+	cursor: pointer;
+	color: var(--joplin-color);
+	opacity: 0.4;
+	display: inline-flex;
+	align-items: center;
+	justify-content: center;
+	transition: opacity 0.15s, background-color 0.15s;
+}
+
+.cluster-edit-btn:hover {
+	opacity: 0.9;
+	background-color: var(--joplin-divider-color);
+}
+
+.cluster-title-input {
+	font-size: 0.9em;
+	font-weight: 600;
+	font-family: inherit;
+	padding: 2px 6px;
+	border: 1px solid var(--accent);
+	border-radius: 4px;
+	background: var(--joplin-background-color);
+	color: var(--joplin-color);
+	outline: none;
+	min-width: 120px;
+	max-width: 200px;
+	width: 100%;
+}
diff --git a/src/worker/embedWorker.ts b/src/worker/embedWorker.ts
index 9b5752c..ce0f585 100644
--- a/src/worker/embedWorker.ts
+++ b/src/worker/embedWorker.ts
@@ -14,12 +14,25 @@ const POOLING = 'mean' as const;
 env.backends.onnx.wasm!.wasmPaths = '../onnx-dist/';
 
 let embedder: any = null;
+let selectedDevice: any = 'wasm';
+let selectedDtype: any = 'q8';
+
+const loadWasmFallback = async () => {
+	selectedDevice = 'wasm';
+	selectedDtype = 'q8';
+	embedder = await pipeline('feature-extraction', MODEL_ID, {
+		dtype: selectedDtype,
+		device: selectedDevice,
+	});
+	const result = await embedder('warmup text', { pooling: POOLING, normalize: true });
+	if (result && result.data && result.data.some((v: number) => isNaN(v))) {
+		throw new Error('WASM fallback warmup returned NaN values');
+	}
+};
 
 const loadModel = async () => {
 	const t0 = performance.now();
 
-	let selectedDevice: any = 'wasm';
-	let selectedDtype: any = 'q8';
 	let workerGpuExists = false;
 	let adapterFound = false;
 
@@ -43,17 +56,13 @@ const loadModel = async () => {
 			dtype: selectedDtype,
 			device: selectedDevice,
 		});
-		await embedder('warmup text', { pooling: POOLING, normalize: true });
+		const warmupResult = await embedder('warmup text', { pooling: POOLING, normalize: true });
+		if (warmupResult && warmupResult.data && warmupResult.data.some((v: number) => isNaN(v))) {
+			throw new Error('Warmup returned NaN values (WebGPU fp16 numeric instability)');
+		}
 	} catch (e) {
 		if (selectedDevice === 'webgpu') {
-			// WebGPU pipeline or warmup failed, retry with WASM/q8
-			selectedDevice = 'wasm';
-			selectedDtype = 'q8';
-			embedder = await pipeline('feature-extraction', MODEL_ID, {
-				dtype: selectedDtype,
-				device: selectedDevice,
-			});
-			await embedder('warmup text', { pooling: POOLING, normalize: true });
+			await loadWasmFallback();
 		} else {
 			throw e;
 		}
@@ -70,16 +79,30 @@ const loadModel = async () => {
 	};
 };
 
-const embed = async (text: string) => {
+const embed = async (text: string): Promise<{ inferenceTime: number; dimensions: number; embedding: number[] }> => {
 	if (!embedder) throw new Error('Model not loaded');
 
-	const t0 = performance.now();
-	const output = await embedder(text, { pooling: POOLING, normalize: true });
-	const inferenceTime = performance.now() - t0;
-	const dimensions = output.data.length;
-	const embedding = Array.from(output.data as Float32Array);
+	try {
+		const t0 = performance.now();
+		const output = await embedder(text, { pooling: POOLING, normalize: true });
+		const inferenceTime = performance.now() - t0;
+		const dimensions = output.data.length;
+		const embedding = Array.from(output.data as Float32Array);
+
+		if (embedding.some((v) => isNaN(v))) {
+			throw new Error('Inference returned NaN values');
+		}
 
-	return { inferenceTime, dimensions, embedding };
+		return { inferenceTime, dimensions, embedding };
+	} catch (e: any) {
+		if (selectedDevice === 'webgpu') {
+			console.warn('WebGPU inference failed or returned NaN. Falling back to WASM/q8 dynamically...', e);
+			await loadWasmFallback();
+			return await embed(text);
+		} else {
+			throw e;
+		}
+	}
 };
 
 self.addEventListener('message', async (event) => {

From c6b34a5c49e085c77a37edd6fc653e31f61737cc Mon Sep 17 00:00:00 2001
From: Harsh16gupta <harsh16official@gmail.com>
Date: Sat, 4 Jul 2026 17:01:08 +0530
Subject: [PATCH 2/2] style: format postProcess.ts with prettier

---
 src/pipeline/clustering/postProcess.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/pipeline/clustering/postProcess.ts b/src/pipeline/clustering/postProcess.ts
index e33060d..15f2845 100644
--- a/src/pipeline/clustering/postProcess.ts
+++ b/src/pipeline/clustering/postProcess.ts
@@ -878,9 +878,7 @@ export function enrichResultsWithTags(results: BenchmarkResult[], documents: Doc
 		}
 
 		// Resolve duplicates by appending the cluster's top-scoring candidate keyword in parentheses
-		const usedNames = new Set<string>(
-			Object.values(clusterNames).filter((name) => nameCounts[name] === 1),
-		);
+		const usedNames = new Set<string>(Object.values(clusterNames).filter((name) => nameCounts[name] === 1));
 
 		for (const idStr of Object.keys(clusterNames)) {
 			const id = Number(idStr);