Add token count display and custom Whisper prompt token limit

Features: - Added token count display for custom Whisper prompt setting - Improved styling of the info box for better readability - Added custom Whisper prompt token limit to enhance transcription accuracy - Updated info box to provide guidance on token limits and accuracy
SystemSculpt · Sep 22, 2024 · 0f54fce · 0f54fce
1 parent 75ba14f
commit 0f54fce
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 4 deletions.
diff --git a/manifest.json b/manifest.json
@@ -6,6 +6,6 @@
   "authorUrl": "systemsculpt.com",
   "fundingUrl": "https://www.patreon.com/SystemSculpt",
   "minAppVersion": "1.5.0",
-  "version": "0.6.7",
+  "version": "0.6.8",
   "isDesktopOnly": true
 }
diff --git a/src/css/info-box.css b/src/css/info-box.css
@@ -15,3 +15,15 @@
 .upcoming-features {
   margin-top: 35px;
 }
+
+.info-box-token-count {
+  font-size: 12px;
+  background-color: var(--background-primary);
+  padding: 5px 10px;
+  border-radius: 12px;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+  display: block;
+  text-align: right;
+  margin-bottom: 5px;
+  color: var(--text-muted);
+}
diff --git a/src/modules/recorder/settings/customWhisperPromptSetting.ts b/src/modules/recorder/settings/customWhisperPromptSetting.ts
@@ -1,6 +1,11 @@
 import { Setting } from 'obsidian';
 import { RecorderModule } from '../RecorderModule';
 import { DEFAULT_RECORDER_SETTINGS } from './RecorderSettings';
+import { encode, decode } from 'gpt-tokenizer';
+
+function getWhisperTokenCount(text: string): number {
+  return encode(text).length;
+}
 
 export function renderCustomWhisperPromptSetting(
   containerEl: HTMLElement,
@@ -22,14 +27,19 @@ export function renderCustomWhisperPromptSetting(
   if (plugin.settings.enableCustomWhisperPrompt) {
     new Setting(containerEl)
       .setName('Custom Whisper Prompt')
-      .setDesc('Customize the prompt used for Whisper transcription')
+      .setDesc('Customize the prompt used for Whisper transcription (max 244 tokens)')
       .addTextArea(text => {
         text
           .setPlaceholder('Enter custom prompt')
           .setValue(plugin.settings.customWhisperPrompt)
           .onChange(async value => {
-            plugin.settings.customWhisperPrompt = value;
+            const truncatedValue = truncateToTokenLimit(value, 244);
+            if (truncatedValue !== value) {
+              text.setValue(truncatedValue);
+            }
+            plugin.settings.customWhisperPrompt = truncatedValue;
             await plugin.saveSettings();
+            updateTokenCount(truncatedValue, tokenCountEl);
           });
         text.inputEl.rows = 4;
         text.inputEl.cols = 50;
@@ -44,5 +54,30 @@ export function renderCustomWhisperPromptSetting(
             plugin.settingsDisplay(containerEl);
           });
       });
+
+    const tokenCountEl = containerEl.createDiv({ cls: 'info-box-token-count' });
+    tokenCountEl.style.textAlign = 'right';
+    tokenCountEl.style.marginBottom = '8px';
+
+    updateTokenCount(plugin.settings.customWhisperPrompt, tokenCountEl);
+
+    const infoBoxEl = containerEl.createDiv('info-box');
+    infoBoxEl.createEl('p', {
+      text: 'The custom Whisper prompt can help improve transcription accuracy by correcting specific words or acronyms, preserving context for split audio files, ensuring proper punctuation and filler words, and specifying preferred writing styles for certain languages. Note that Whisper only considers the first 244 tokens of the prompt.',
+    });
+  }
+}
+
+function updateTokenCount(text: string, tokenCountEl: HTMLElement) {
+  const tokenCount = getWhisperTokenCount(text);
+  const tokenCountText = `${tokenCount}/244 tokens used`;
+  tokenCountEl.textContent = tokenCountText;
+}
+
+function truncateToTokenLimit(text: string, limit: number): string {
+  const tokens = encode(text);
+  if (tokens.length <= limit) {
+    return text;
   }
+  return decode(tokens.slice(0, limit));
 }
diff --git a/src/modules/recorder/settings/postProcessingPromptSetting.ts b/src/modules/recorder/settings/postProcessingPromptSetting.ts
@@ -44,5 +44,10 @@ export function renderPostProcessingPromptSetting(
             plugin.settingsDisplay(containerEl);
           });
       });
+
+    const infoBoxEl = containerEl.createDiv('info-box');
+    infoBoxEl.createEl('p', {
+      text: 'The post-processing prompt uses GPT models to improve the transcript by correcting misspellings, improving overall accuracy, and enhancing readability. It allows for more specific instructions and can handle a larger context window for better understanding of the transcription.',
+    });
   }
 }
diff --git a/styles.css b/styles.css
@@ -1726,6 +1726,18 @@
 .upcoming-features {
   margin-top: 35px;
 }
+
+.info-box-token-count {
+  font-size: 12px;
+  background-color: var(--background-primary);
+  padding: 5px 10px;
+  border-radius: 12px;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+  display: block;
+  text-align: right;
+  margin-bottom: 5px;
+  color: var(--text-muted);
+}
 /* Spinner */
 .spinner {
   width: 40px;

diff --git a/versions.json b/versions.json
@@ -1,4 +1,4 @@
 {
   "0.1.0": "1.5.0",
-  "0.6.7": "1.5.0"
+  "0.6.8": "1.5.0"
 }