feat: implement multiprocessing for transcription with immediate cancellation

This commit is contained in:
2026-04-05 22:11:13 +02:00
parent e29572420e
commit 8d5c8d6097
2 changed files with 112 additions and 32 deletions

104
app.py
View File

@@ -4,7 +4,8 @@ import tkinter as tk
from tkinter import ttk from tkinter import ttk
from tkinter import filedialog from tkinter import filedialog
from tkinter import messagebox from tkinter import messagebox
from src._LocalTranscribe import transcribe, get_path, detect_backend from src._LocalTranscribe import transcribe, get_path, detect_backend, _transcribe_worker_process
import multiprocessing as mp
import customtkinter import customtkinter
import threading import threading
@@ -220,8 +221,10 @@ class App:
self.timestamps_switch.pack(side=tk.LEFT, padx=5) self.timestamps_switch.pack(side=tk.LEFT, padx=5)
# Progress Bar # Progress Bar
self.progress_bar = ttk.Progressbar(master, length=200, mode='indeterminate') self.progress_bar = ttk.Progressbar(master, length=200, mode='indeterminate')
# Stop event for cancellation # Worker process handle (replaces thread+stop_event for true immediate cancellation)
self._stop_event = threading.Event() self._proc = None
self._parent_conn = None
self._child_conn = None
# Button actions frame # Button actions frame
button_frame = customtkinter.CTkFrame(master) button_frame = customtkinter.CTkFrame(master)
button_frame.pack(fill=tk.BOTH, padx=10, pady=10) button_frame.pack(fill=tk.BOTH, padx=10, pady=10)
@@ -262,9 +265,28 @@ class App:
print("" * 46) print("" * 46)
# Helper functions # Helper functions
def _stop_transcription(self): def _stop_transcription(self):
self._stop_event.set()
self.stop_button.configure(state=tk.DISABLED) self.stop_button.configure(state=tk.DISABLED)
print("⛔ Stop requested — finishing current file…") if self._proc and self._proc.is_alive():
self._proc.terminate()
try:
self._proc.join(timeout=3)
except Exception:
pass
if self._proc.is_alive():
self._proc.kill()
try:
self._proc.join(timeout=1)
except Exception:
pass
# Close pipe ends — no semaphores, so no leak
for conn in (self._parent_conn, self._child_conn):
try:
if conn:
conn.close()
except Exception:
pass
self._parent_conn = self._child_conn = None
print("⛔ Transcription stopped by user.")
def _model_desc_text(self, model_name): def _model_desc_text(self, model_name):
info = MODEL_INFO.get(model_name) info = MODEL_INFO.get(model_name)
@@ -287,49 +309,67 @@ class App:
self.path_entry.insert(0, folder_path) self.path_entry.insert(0, folder_path)
# Start transcription # Start transcription
def start_transcription(self): def start_transcription(self):
self._stop_event.clear()
self.transcribe_button.configure(state=tk.DISABLED)
self.stop_button.configure(state=tk.NORMAL)
threading.Thread(target=self.transcribe_thread, daemon=True).start()
# Threading
def transcribe_thread(self):
path = self.path_entry.get()
model_display = self.model_combobox.get() model_display = self.model_combobox.get()
# Ignore the visual separator
if model_display.startswith(''): if model_display.startswith(''):
messagebox.showinfo("Invalid selection", "Please select a model, not the separator line.") messagebox.showinfo("Invalid selection", "Please select a model, not the separator line.")
self.transcribe_button.configure(state=tk.NORMAL)
return return
self.transcribe_button.configure(state=tk.DISABLED)
self.stop_button.configure(state=tk.NORMAL)
path = self.path_entry.get()
model = HF_MODEL_MAP.get(model_display, model_display) model = HF_MODEL_MAP.get(model_display, model_display)
lang_label = self.language_combobox.get() lang_label = self.language_combobox.get()
language = WHISPER_LANGUAGES.get(lang_label, lang_label) if lang_label else None language = WHISPER_LANGUAGES.get(lang_label, lang_label) if lang_label else None
verbose = True # always show transcription progress in the console panel
timestamps = self.timestamps_var.get() timestamps = self.timestamps_var.get()
# Show progress bar glob_file = get_path(path)
self.progress_bar.pack(fill=tk.X, padx=5, pady=5) self.progress_bar.pack(fill=tk.X, padx=5, pady=5)
self.progress_bar.start() self.progress_bar.start()
# Setting path and files self._parent_conn, self._child_conn = mp.Pipe(duplex=False)
glob_file = get_path(path) self._proc = mp.Process(
#messagebox.showinfo("Message", "Starting transcription!") target=_transcribe_worker_process,
# Start transcription args=(self._child_conn, path, glob_file, model, language, True, timestamps),
daemon=True,
)
self._proc.start()
self._child_conn.close() # parent doesn't write; close its write-end
self._child_conn = None
self.master.after(100, self._poll_worker)
def _poll_worker(self):
done = False
result = None
try: try:
output_text = transcribe(path, glob_file, model, language, verbose, timestamps, stop_event=self._stop_event) while self._parent_conn and self._parent_conn.poll():
except UnboundLocalError: msg = self._parent_conn.recv()
messagebox.showinfo("Files not found error!", 'Nothing found, choose another folder.') if isinstance(msg, tuple) and msg[0] == '__done__':
done = True
result = msg[1]
else:
sys.stdout.write(msg)
sys.stdout.flush()
except EOFError:
# Child closed the pipe (normal completion or kill)
done = True
except Exception:
pass pass
except ValueError as e: if done or (self._proc and not self._proc.is_alive()):
messagebox.showinfo("Error", str(e)) if self._parent_conn:
# Hide progress bar try:
self._parent_conn.close()
except Exception:
pass
self._parent_conn = None
self._on_transcription_done(result)
else:
self.master.after(100, self._poll_worker)
def _on_transcription_done(self, output_text):
self.progress_bar.stop() self.progress_bar.stop()
self.progress_bar.pack_forget() self.progress_bar.pack_forget()
# Restore buttons
self.stop_button.configure(state=tk.DISABLED) self.stop_button.configure(state=tk.DISABLED)
self.transcribe_button.configure(state=tk.NORMAL) self.transcribe_button.configure(state=tk.NORMAL)
# Recover output text if output_text:
try: title = "Finished!" if not output_text.startswith('') else "Error"
messagebox.showinfo("Finished!", output_text) messagebox.showinfo(title, output_text)
except UnboundLocalError:
pass
if __name__ == "__main__": if __name__ == "__main__":
# Setting custom themes # Setting custom themes

View File

@@ -424,3 +424,43 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False, timest
print(output_text) print(output_text)
print(SEP) print(SEP)
return output_text return output_text
def _transcribe_worker_process(conn, path, glob_file, model, language, verbose, timestamps):
"""Child-process entry point for the UI's multiprocessing backend.
Redirects stdout/stderr → pipe connection so the main process can display
output in the console panel. The main process sends SIGTERM/SIGKILL to
stop this process immediately, including any in-progress download or inference.
"""
import sys
class _PipeWriter:
def __init__(self, c):
self.c = c
def write(self, text):
if text:
try:
self.c.send(text)
except Exception:
pass
def flush(self):
pass
writer = _PipeWriter(conn)
sys.stdout = writer
sys.stderr = writer
result = '⚠ No output produced.'
try:
result = transcribe(path, glob_file, model, language, verbose, timestamps)
except Exception as exc:
result = f'⚠ Unexpected error: {exc}'
finally:
try:
conn.send(('__done__', result))
except Exception:
pass
conn.close()