407 lines
19 KiB
Python
407 lines
19 KiB
Python
import os
|
|
import sys
|
|
import tkinter as tk
|
|
from tkinter import ttk
|
|
from tkinter import filedialog
|
|
from tkinter import messagebox
|
|
from src._LocalTranscribe import transcribe, get_path, detect_backend, _transcribe_worker_process
|
|
import multiprocessing as mp
|
|
import customtkinter
|
|
import threading
|
|
|
|
|
|
# ── Helper: redirect stdout/stderr into a CTkTextbox ──────────────────────
|
|
import re
|
|
_ANSI_RE = re.compile(r'\x1b\[[0-9;]*m') # strip colour codes
|
|
|
|
class _ConsoleRedirector:
|
|
"""Redirects output exclusively to the in-app console panel."""
|
|
def __init__(self, text_widget):
|
|
self.widget = text_widget
|
|
|
|
def write(self, text):
|
|
clean = _ANSI_RE.sub('', text) # strip ANSI colours
|
|
if clean.strip() == '':
|
|
return
|
|
# Schedule UI update on the main thread
|
|
try:
|
|
self.widget.after(0, self._append, clean)
|
|
except Exception:
|
|
pass
|
|
|
|
def _append(self, text):
|
|
self.widget.configure(state='normal')
|
|
self.widget.insert('end', text + ('\n' if not text.endswith('\n') else ''))
|
|
self.widget.see('end')
|
|
self.widget.configure(state='disabled')
|
|
|
|
def flush(self):
|
|
pass
|
|
|
|
# HuggingFace model IDs for non-standard models
|
|
HF_MODEL_MAP = {
|
|
'KB Swedish (tiny)': 'KBLab/kb-whisper-tiny',
|
|
'KB Swedish (base)': 'KBLab/kb-whisper-base',
|
|
'KB Swedish (small)': 'KBLab/kb-whisper-small',
|
|
'KB Swedish (medium)': 'KBLab/kb-whisper-medium',
|
|
'KB Swedish (large)': 'KBLab/kb-whisper-large',
|
|
}
|
|
|
|
# Per-model info shown in the UI description label
|
|
# (speed, size, quality stars, suggested use)
|
|
MODEL_INFO = {
|
|
'tiny': ('Very fast', '~75 MB', '★★☆☆☆', 'Quick drafts & testing'),
|
|
'tiny.en': ('Very fast', '~75 MB', '★★☆☆☆', 'Quick drafts & testing (English only)'),
|
|
'base': ('Fast', '~145 MB', '★★★☆☆', 'Notes & short podcasts'),
|
|
'base.en': ('Fast', '~145 MB', '★★★☆☆', 'Notes & short podcasts (English only)'),
|
|
'small': ('Balanced', '~485 MB', '★★★★☆', 'Everyday use'),
|
|
'small.en': ('Balanced', '~485 MB', '★★★★☆', 'Everyday use (English only)'),
|
|
'medium': ('Accurate', '~1.5 GB', '★★★★☆', 'Professional content'),
|
|
'medium.en': ('Accurate', '~1.5 GB', '★★★★☆', 'Professional content (English only)'),
|
|
'large-v2': ('Slow', '~3 GB', '★★★★★', 'Maximum accuracy'),
|
|
'large-v3': ('Slow', '~3 GB', '★★★★★', 'Maximum accuracy (recommended)'),
|
|
'KB Swedish (tiny)': ('Very fast', '~75 MB', '★★★☆☆', 'Swedish — optimised by KBLab'),
|
|
'KB Swedish (base)': ('Fast', '~145 MB', '★★★☆☆', 'Swedish — optimised by KBLab'),
|
|
'KB Swedish (small)': ('Balanced', '~485 MB', '★★★★☆', 'Swedish — optimised by KBLab'),
|
|
'KB Swedish (medium)': ('Accurate', '~1.5 GB', '★★★★☆', 'Swedish — optimised by KBLab'),
|
|
'KB Swedish (large)': ('Slow', '~3 GB', '★★★★★', 'Swedish — KBLab, best accuracy'),
|
|
}
|
|
|
|
|
|
|
|
customtkinter.set_appearance_mode("System")
|
|
customtkinter.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green
|
|
|
|
# All languages supported by Whisper (display label → ISO code; None = auto-detect)
|
|
WHISPER_LANGUAGES = {
|
|
'Auto-detect': None,
|
|
'Afrikaans (af)': 'af', 'Albanian (sq)': 'sq',
|
|
'Amharic (am)': 'am', 'Arabic (ar)': 'ar',
|
|
'Armenian (hy)': 'hy', 'Assamese (as)': 'as',
|
|
'Azerbaijani (az)': 'az', 'Bashkir (ba)': 'ba',
|
|
'Basque (eu)': 'eu', 'Belarusian (be)': 'be',
|
|
'Bengali (bn)': 'bn', 'Bosnian (bs)': 'bs',
|
|
'Breton (br)': 'br', 'Bulgarian (bg)': 'bg',
|
|
'Catalan (ca)': 'ca', 'Chinese (zh)': 'zh',
|
|
'Croatian (hr)': 'hr', 'Czech (cs)': 'cs',
|
|
'Danish (da)': 'da', 'Dutch (nl)': 'nl',
|
|
'English (en)': 'en', 'Estonian (et)': 'et',
|
|
'Faroese (fo)': 'fo', 'Finnish (fi)': 'fi',
|
|
'French (fr)': 'fr', 'Galician (gl)': 'gl',
|
|
'Georgian (ka)': 'ka', 'German (de)': 'de',
|
|
'Greek (el)': 'el', 'Gujarati (gu)': 'gu',
|
|
'Haitian Creole (ht)': 'ht', 'Hausa (ha)': 'ha',
|
|
'Hawaiian (haw)': 'haw', 'Hebrew (he)': 'he',
|
|
'Hindi (hi)': 'hi', 'Hungarian (hu)': 'hu',
|
|
'Icelandic (is)': 'is', 'Indonesian (id)': 'id',
|
|
'Italian (it)': 'it', 'Japanese (ja)': 'ja',
|
|
'Javanese (jw)': 'jw', 'Kannada (kn)': 'kn',
|
|
'Kazakh (kk)': 'kk', 'Khmer (km)': 'km',
|
|
'Korean (ko)': 'ko', 'Lao (lo)': 'lo',
|
|
'Latin (la)': 'la', 'Latvian (lv)': 'lv',
|
|
'Lingala (ln)': 'ln', 'Lithuanian (lt)': 'lt',
|
|
'Luxembourgish (lb)': 'lb', 'Macedonian (mk)': 'mk',
|
|
'Malagasy (mg)': 'mg', 'Malay (ms)': 'ms',
|
|
'Malayalam (ml)': 'ml', 'Maltese (mt)': 'mt',
|
|
'Maori (mi)': 'mi', 'Marathi (mr)': 'mr',
|
|
'Mongolian (mn)': 'mn', 'Myanmar (my)': 'my',
|
|
'Nepali (ne)': 'ne', 'Norwegian (no)': 'no',
|
|
'Occitan (oc)': 'oc', 'Pashto (ps)': 'ps',
|
|
'Persian (fa)': 'fa', 'Polish (pl)': 'pl',
|
|
'Portuguese (pt)': 'pt', 'Punjabi (pa)': 'pa',
|
|
'Romanian (ro)': 'ro', 'Russian (ru)': 'ru',
|
|
'Sanskrit (sa)': 'sa', 'Serbian (sr)': 'sr',
|
|
'Shona (sn)': 'sn', 'Sindhi (sd)': 'sd',
|
|
'Sinhala (si)': 'si', 'Slovak (sk)': 'sk',
|
|
'Slovenian (sl)': 'sl', 'Somali (so)': 'so',
|
|
'Spanish (es)': 'es', 'Sundanese (su)': 'su',
|
|
'Swahili (sw)': 'sw', 'Swedish (sv)': 'sv',
|
|
'Tagalog (tl)': 'tl', 'Tajik (tg)': 'tg',
|
|
'Tamil (ta)': 'ta', 'Tatar (tt)': 'tt',
|
|
'Telugu (te)': 'te', 'Thai (th)': 'th',
|
|
'Tibetan (bo)': 'bo', 'Turkish (tr)': 'tr',
|
|
'Turkmen (tk)': 'tk', 'Ukrainian (uk)': 'uk',
|
|
'Urdu (ur)': 'ur', 'Uzbek (uz)': 'uz',
|
|
'Vietnamese (vi)': 'vi', 'Welsh (cy)': 'cy',
|
|
'Yiddish (yi)': 'yi', 'Yoruba (yo)': 'yo',
|
|
}
|
|
|
|
|
|
def _language_options_for_model(model_name):
|
|
"""Return (values, default, state) for the language combobox given a model name."""
|
|
if model_name.endswith('.en'):
|
|
return ['English (en)'], 'English (en)', 'disabled'
|
|
if model_name.startswith('KB Swedish'):
|
|
return ['Swedish (sv)'], 'Swedish (sv)', 'disabled'
|
|
return list(WHISPER_LANGUAGES.keys()), 'Auto-detect', 'readonly'
|
|
|
|
|
|
def _set_app_icon(root):
|
|
"""Set app icon when supported, without crashing on unsupported platforms."""
|
|
base_dir = os.path.dirname(os.path.abspath(__file__))
|
|
icon_path = os.path.join(base_dir, "images", "icon.ico")
|
|
|
|
if not os.path.exists(icon_path):
|
|
return
|
|
|
|
try:
|
|
root.iconbitmap(icon_path)
|
|
except tk.TclError:
|
|
# Some Linux Tk builds don't accept .ico for iconbitmap.
|
|
pass
|
|
|
|
|
|
def _apply_display_scaling(root):
|
|
"""Auto-scale UI for high-resolution displays (e.g., 4K)."""
|
|
try:
|
|
screen_w = root.winfo_screenwidth()
|
|
screen_h = root.winfo_screenheight()
|
|
scale = min(screen_w / 1920.0, screen_h / 1080.0)
|
|
scale = max(1.0, min(scale, 2.0))
|
|
customtkinter.set_widget_scaling(scale)
|
|
customtkinter.set_window_scaling(scale)
|
|
except Exception:
|
|
pass
|
|
|
|
class App:
|
|
def __init__(self, master):
|
|
self.master = master
|
|
# Change font
|
|
font = ('Roboto', 13, 'bold') # Change the font and size here
|
|
font_b = ('Roboto', 12) # Change the font and size here
|
|
# Folder Path
|
|
path_frame = customtkinter.CTkFrame(master)
|
|
path_frame.pack(fill=tk.BOTH, padx=10, pady=10)
|
|
customtkinter.CTkLabel(path_frame, text="Folder:", font=font).pack(side=tk.LEFT, padx=5)
|
|
self.path_entry = customtkinter.CTkEntry(path_frame, width=50, font=font_b)
|
|
self.path_entry.insert(0, os.path.join(os.getcwd(), 'sample_audio'))
|
|
self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
|
customtkinter.CTkButton(path_frame, text="Browse", command=self.browse, font=font).pack(side=tk.LEFT, padx=5)
|
|
# Language frame
|
|
language_frame = customtkinter.CTkFrame(master)
|
|
language_frame.pack(fill=tk.BOTH, padx=10, pady=10)
|
|
customtkinter.CTkLabel(language_frame, text="Language:", font=font).pack(side=tk.LEFT, padx=5)
|
|
_lang_values, _lang_default, _lang_state = _language_options_for_model('medium')
|
|
self.language_combobox = customtkinter.CTkComboBox(
|
|
language_frame, width=50, state=_lang_state,
|
|
values=_lang_values, font=font_b)
|
|
self.language_combobox.set(_lang_default)
|
|
self.language_combobox.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
|
# Model frame
|
|
models = ['tiny', 'tiny.en', 'base', 'base.en',
|
|
'small', 'small.en', 'medium', 'medium.en',
|
|
'large-v2', 'large-v3',
|
|
'───────────────',
|
|
'KB Swedish (tiny)', 'KB Swedish (base)',
|
|
'KB Swedish (small)', 'KB Swedish (medium)',
|
|
'KB Swedish (large)']
|
|
model_frame = customtkinter.CTkFrame(master)
|
|
model_frame.pack(fill=tk.BOTH, padx=10, pady=10)
|
|
customtkinter.CTkLabel(model_frame, text="Model:", font=font).pack(side=tk.LEFT, padx=5)
|
|
# ComboBox frame
|
|
self.model_combobox = customtkinter.CTkComboBox(
|
|
model_frame, width=50, state="readonly",
|
|
values=models, font=font_b,
|
|
command=self._on_model_change)
|
|
self.model_combobox.set('medium') # Set the default value
|
|
self.model_combobox.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
|
# Model description label
|
|
self.model_desc_label = customtkinter.CTkLabel(
|
|
master, text=self._model_desc_text('medium'),
|
|
font=('Roboto', 11), text_color=('#555555', '#aaaaaa'),
|
|
anchor='w')
|
|
self.model_desc_label.pack(fill=tk.X, padx=14, pady=(0, 4))
|
|
# Timestamps toggle
|
|
ts_frame = customtkinter.CTkFrame(master)
|
|
ts_frame.pack(fill=tk.BOTH, padx=10, pady=10)
|
|
self.timestamps_var = tk.BooleanVar(value=True)
|
|
self.timestamps_switch = customtkinter.CTkSwitch(
|
|
ts_frame, text="Include timestamps in transcription",
|
|
variable=self.timestamps_var, font=font_b)
|
|
self.timestamps_switch.pack(side=tk.LEFT, padx=5)
|
|
# Advanced options frame
|
|
adv_frame = customtkinter.CTkFrame(master)
|
|
adv_frame.pack(fill=tk.BOTH, padx=10, pady=10)
|
|
self.vad_var = tk.BooleanVar(value=False)
|
|
customtkinter.CTkSwitch(
|
|
adv_frame, text="VAD filter (remove silence)",
|
|
variable=self.vad_var, font=font_b).pack(side=tk.LEFT, padx=5)
|
|
self.word_ts_var = tk.BooleanVar(value=False)
|
|
customtkinter.CTkSwitch(
|
|
adv_frame, text="Word-level timestamps",
|
|
variable=self.word_ts_var, font=font_b).pack(side=tk.LEFT, padx=5)
|
|
self.translate_var = tk.BooleanVar(value=False)
|
|
customtkinter.CTkSwitch(
|
|
adv_frame, text="Translate to English",
|
|
variable=self.translate_var, font=font_b).pack(side=tk.LEFT, padx=5)
|
|
# Progress Bar
|
|
self.progress_bar = ttk.Progressbar(master, length=200, mode='indeterminate')
|
|
# Worker process handle (replaces thread+stop_event for true immediate cancellation)
|
|
self._proc = None
|
|
self._parent_conn = None
|
|
self._child_conn = None
|
|
# Button actions frame
|
|
button_frame = customtkinter.CTkFrame(master)
|
|
button_frame.pack(fill=tk.BOTH, padx=10, pady=10)
|
|
self.transcribe_button = customtkinter.CTkButton(button_frame, text="Transcribe", command=self.start_transcription, font=font)
|
|
self.transcribe_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
|
|
self.stop_button = customtkinter.CTkButton(
|
|
button_frame, text="Stop", command=self._stop_transcription, font=font,
|
|
fg_color="#c0392b", hover_color="#922b21", state=tk.DISABLED)
|
|
self.stop_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
|
|
customtkinter.CTkButton(button_frame, text="Quit", command=master.quit, font=font).pack(side=tk.RIGHT, padx=5, pady=10, fill=tk.X, expand=True)
|
|
|
|
# ── Embedded console / log panel ──────────────────────────────────
|
|
log_label = customtkinter.CTkLabel(master, text="Console output", font=font, anchor='w')
|
|
log_label.pack(fill=tk.X, padx=12, pady=(8, 0))
|
|
self.log_box = customtkinter.CTkTextbox(master, height=220, font=('Consolas', 14),
|
|
wrap='word', state='disabled',
|
|
fg_color='#1e1e1e', text_color='#e0e0e0')
|
|
self.log_box.pack(fill=tk.BOTH, expand=True, padx=10, pady=(2, 10))
|
|
|
|
# Redirect stdout & stderr into the log panel (no backend console)
|
|
sys.stdout = _ConsoleRedirector(self.log_box)
|
|
sys.stderr = _ConsoleRedirector(self.log_box)
|
|
|
|
# Backend indicator
|
|
_bi = detect_backend()
|
|
backend_label = customtkinter.CTkLabel(
|
|
master,
|
|
text=f"Backend: {_bi['label']}",
|
|
font=('Roboto', 11),
|
|
text_color=("#555555", "#aaaaaa"),
|
|
anchor='e',
|
|
)
|
|
backend_label.pack(fill=tk.X, padx=12, pady=(0, 2))
|
|
|
|
# Welcome message (shown after redirect so it appears in the panel)
|
|
print("Welcome to Local Transcribe with Whisper! \U0001f600")
|
|
print("Transcriptions will be saved automatically.")
|
|
print("─" * 46)
|
|
# Helper functions
|
|
def _stop_transcription(self):
|
|
self.stop_button.configure(state=tk.DISABLED)
|
|
if self._proc and self._proc.is_alive():
|
|
self._proc.terminate()
|
|
try:
|
|
self._proc.join(timeout=3)
|
|
except Exception:
|
|
pass
|
|
if self._proc.is_alive():
|
|
self._proc.kill()
|
|
try:
|
|
self._proc.join(timeout=1)
|
|
except Exception:
|
|
pass
|
|
# Close pipe ends — no semaphores, so no leak
|
|
for conn in (self._parent_conn, self._child_conn):
|
|
try:
|
|
if conn:
|
|
conn.close()
|
|
except Exception:
|
|
pass
|
|
self._parent_conn = self._child_conn = None
|
|
print("⛔ Transcription stopped by user.")
|
|
|
|
def _model_desc_text(self, model_name):
|
|
info = MODEL_INFO.get(model_name)
|
|
if not info:
|
|
return ''
|
|
speed, size, stars, use = info
|
|
return f'{stars} {speed} · {size} · {use}'
|
|
|
|
def _on_model_change(self, selected):
|
|
self.model_desc_label.configure(text=self._model_desc_text(selected))
|
|
values, default, state = _language_options_for_model(selected)
|
|
self.language_combobox.configure(values=values, state=state)
|
|
self.language_combobox.set(default)
|
|
|
|
# Browsing
|
|
def browse(self):
|
|
initial_dir = os.getcwd()
|
|
folder_path = filedialog.askdirectory(initialdir=initial_dir)
|
|
self.path_entry.delete(0, tk.END)
|
|
self.path_entry.insert(0, folder_path)
|
|
# Start transcription
|
|
def start_transcription(self):
|
|
model_display = self.model_combobox.get()
|
|
if model_display.startswith('─'):
|
|
messagebox.showinfo("Invalid selection", "Please select a model, not the separator line.")
|
|
return
|
|
self.transcribe_button.configure(state=tk.DISABLED)
|
|
self.stop_button.configure(state=tk.NORMAL)
|
|
path = self.path_entry.get()
|
|
model = HF_MODEL_MAP.get(model_display, model_display)
|
|
lang_label = self.language_combobox.get()
|
|
language = WHISPER_LANGUAGES.get(lang_label, lang_label) if lang_label else None
|
|
timestamps = self.timestamps_var.get()
|
|
vad_filter = self.vad_var.get()
|
|
word_timestamps = self.word_ts_var.get()
|
|
translate = self.translate_var.get()
|
|
glob_file = get_path(path)
|
|
self.progress_bar.pack(fill=tk.X, padx=5, pady=5)
|
|
self.progress_bar.start()
|
|
self._parent_conn, self._child_conn = mp.Pipe(duplex=False)
|
|
self._proc = mp.Process(
|
|
target=_transcribe_worker_process,
|
|
args=(self._child_conn, path, glob_file, model, language, True, timestamps),
|
|
kwargs={"vad_filter": vad_filter, "word_timestamps": word_timestamps, "translate": translate},
|
|
daemon=True,
|
|
)
|
|
self._proc.start()
|
|
self._child_conn.close() # parent doesn't write; close its write-end
|
|
self._child_conn = None
|
|
self.master.after(100, self._poll_worker)
|
|
|
|
def _poll_worker(self):
|
|
done = False
|
|
result = None
|
|
try:
|
|
while self._parent_conn and self._parent_conn.poll():
|
|
msg = self._parent_conn.recv()
|
|
if isinstance(msg, tuple) and msg[0] == '__done__':
|
|
done = True
|
|
result = msg[1]
|
|
else:
|
|
sys.stdout.write(msg)
|
|
sys.stdout.flush()
|
|
except EOFError:
|
|
# Child closed the pipe (normal completion or kill)
|
|
done = True
|
|
except Exception:
|
|
pass
|
|
if done or (self._proc and not self._proc.is_alive()):
|
|
if self._parent_conn:
|
|
try:
|
|
self._parent_conn.close()
|
|
except Exception:
|
|
pass
|
|
self._parent_conn = None
|
|
self._on_transcription_done(result)
|
|
else:
|
|
self.master.after(100, self._poll_worker)
|
|
|
|
def _on_transcription_done(self, output_text):
|
|
self.progress_bar.stop()
|
|
self.progress_bar.pack_forget()
|
|
self.stop_button.configure(state=tk.DISABLED)
|
|
self.transcribe_button.configure(state=tk.NORMAL)
|
|
if output_text:
|
|
title = "Finished!" if not output_text.startswith('⚠') else "Error"
|
|
messagebox.showinfo(title, output_text)
|
|
|
|
if __name__ == "__main__":
|
|
# Setting custom themes
|
|
root = customtkinter.CTk()
|
|
_apply_display_scaling(root)
|
|
root.title("Local Transcribe with Whisper")
|
|
# Geometry — taller to accommodate the embedded console panel
|
|
width, height = 550, 560
|
|
root.geometry('{}x{}'.format(width, height))
|
|
root.minsize(450, 480)
|
|
# Icon (best-effort; ignored on platforms/builds without .ico support)
|
|
_set_app_icon(root)
|
|
# Run
|
|
app = App(root)
|
|
root.mainloop()
|