From 58255c3d109aaeb842c9adfbc30af47895e487f0 Mon Sep 17 00:00:00 2001 From: soderstromkr <23003509+soderstromkr@users.noreply.github.com> Date: Mon, 2 Mar 2026 21:49:32 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20Linux/Ubuntu=20support=20=E2=80=94=20ico?= =?UTF-8?q?n=20fallback,=20HiDPI=20scaling,=20CUDA=20lib=20paths,=20per-fi?= =?UTF-8?q?le=20timing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - app.py: graceful icon loading (no crash on Linux Tk without .ico support) - app.py: auto-detect display scaling for 4K/HiDPI screens - _LocalTranscribe.py: register NVIDIA pip-package .so paths on Linux (LD_LIBRARY_PATH) so faster-whisper finds libcublas/libcudnn at runtime - _LocalTranscribe.py: auto-fallback to CPU if CUDA runtime libs missing - _LocalTranscribe.py: filter input to supported media extensions only - _LocalTranscribe.py: show real decode errors instead of generic skip message - _LocalTranscribe.py: per-file timer showing wall-clock vs audio duration --- app.py | 33 +++++++++- src/_LocalTranscribe.py | 129 ++++++++++++++++++++++++++++++---------- 2 files changed, 128 insertions(+), 34 deletions(-) diff --git a/app.py b/app.py index bcaefb3..840396f 100644 --- a/app.py +++ b/app.py @@ -52,6 +52,34 @@ customtkinter.set_appearance_mode("System") customtkinter.set_default_color_theme("blue") # Themes: blue (default), dark-blue, green firstclick = True + +def _set_app_icon(root): + """Set app icon when supported, without crashing on unsupported platforms.""" + base_dir = os.path.dirname(os.path.abspath(__file__)) + icon_path = os.path.join(base_dir, "images", "icon.ico") + + if not os.path.exists(icon_path): + return + + try: + root.iconbitmap(icon_path) + except tk.TclError: + # Some Linux Tk builds don't accept .ico for iconbitmap. + pass + + +def _apply_display_scaling(root): + """Auto-scale UI for high-resolution displays (e.g., 4K).""" + try: + screen_w = root.winfo_screenwidth() + screen_h = root.winfo_screenheight() + scale = min(screen_w / 1920.0, screen_h / 1080.0) + scale = max(1.0, min(scale, 2.0)) + customtkinter.set_widget_scaling(scale) + customtkinter.set_window_scaling(scale) + except Exception: + pass + class App: def __init__(self, master): self.master = master @@ -184,13 +212,14 @@ class App: if __name__ == "__main__": # Setting custom themes root = customtkinter.CTk() + _apply_display_scaling(root) root.title("Local Transcribe with Whisper") # Geometry — taller to accommodate the embedded console panel width, height = 550, 560 root.geometry('{}x{}'.format(width, height)) root.minsize(450, 480) - # Icon - root.iconbitmap('images/icon.ico') + # Icon (best-effort; ignored on platforms/builds without .ico support) + _set_app_icon(root) # Run app = App(root) root.mainloop() diff --git a/src/_LocalTranscribe.py b/src/_LocalTranscribe.py index 9279607..296df02 100644 --- a/src/_LocalTranscribe.py +++ b/src/_LocalTranscribe.py @@ -1,46 +1,71 @@ import os import sys import datetime +import time import site from glob import glob # --------------------------------------------------------------------------- # CUDA setup — must happen before importing faster_whisper / ctranslate2 # --------------------------------------------------------------------------- -def _setup_cuda_dlls(): - """Add NVIDIA pip-package DLL dirs to the DLL search path (Windows only). +def _setup_cuda_libs(): + """Register NVIDIA pip-package lib dirs so ctranslate2 finds CUDA at runtime. - pip-installed nvidia-cublas-cu12 / nvidia-cudnn-cu12 place their .dll - files inside the site-packages tree. Python 3.8+ on Windows does NOT - search PATH for DLLs loaded via ctypes/LoadLibrary, so we must - explicitly register every nvidia/*/bin and nvidia/*/lib directory using - os.add_dll_directory *and* prepend them to PATH (some native extensions - still rely on PATH). + pip-installed nvidia-cublas-cu12 / nvidia-cudnn-cu12 place their shared + libraries inside the site-packages tree. Neither Windows nor Linux + automatically search those directories, so we must register them + explicitly: + - Windows: os.add_dll_directory() + PATH + - Linux: LD_LIBRARY_PATH (read by the dynamic linker) """ - if sys.platform != "win32": - return try: - for sp in site.getsitepackages(): - nvidia_root = os.path.join(sp, "nvidia") - if not os.path.isdir(nvidia_root): - continue - for pkg in os.listdir(nvidia_root): - for sub in ("bin", "lib"): - d = os.path.join(nvidia_root, pkg, sub) - if os.path.isdir(d): - os.environ["PATH"] = d + os.pathsep + os.environ.get("PATH", "") - try: - os.add_dll_directory(d) - except (OSError, AttributeError): - pass - except Exception: - pass + sp_dirs = site.getsitepackages() + except AttributeError: + # virtualenv without site-packages helper + sp_dirs = [os.path.join(sys.prefix, "lib", + "python" + ".".join(map(str, sys.version_info[:2])), + "site-packages")] -_setup_cuda_dlls() + for sp in sp_dirs: + nvidia_root = os.path.join(sp, "nvidia") + if not os.path.isdir(nvidia_root): + continue + for pkg in os.listdir(nvidia_root): + for sub in ("bin", "lib"): + d = os.path.join(nvidia_root, pkg, sub) + if not os.path.isdir(d): + continue + if sys.platform == "win32": + os.environ["PATH"] = d + os.pathsep + os.environ.get("PATH", "") + try: + os.add_dll_directory(d) + except (OSError, AttributeError): + pass + else: + # Linux / macOS — prepend to LD_LIBRARY_PATH + ld = os.environ.get("LD_LIBRARY_PATH", "") + if d not in ld: + os.environ["LD_LIBRARY_PATH"] = d + (":" + ld if ld else "") + # Also load via ctypes so already-started process sees it + import ctypes + try: + for so in sorted(os.listdir(d)): + if so.endswith(".so") or ".so." in so: + ctypes.cdll.LoadLibrary(os.path.join(d, so)) + except OSError: + pass + +_setup_cuda_libs() from faster_whisper import WhisperModel +SUPPORTED_EXTENSIONS = { + ".wav", ".mp3", ".m4a", ".flac", ".ogg", ".wma", ".aac", + ".mp4", ".mkv", ".mov", ".webm", ".avi", ".mpeg", ".mpg", +} + + def _detect_device(): """Return (device, compute_type) for the best available backend.""" try: @@ -55,8 +80,15 @@ def _detect_device(): # Get the path def get_path(path): - glob_file = glob(path + '/*') - return glob_file + all_items = glob(path + '/*') + media_files = [] + for item in all_items: + if not os.path.isfile(item): + continue + _, ext = os.path.splitext(item) + if ext.lower() in SUPPORTED_EXTENSIONS: + media_files.append(item) + return sorted(media_files) # Main function def transcribe(path, glob_file, model=None, language=None, verbose=False): @@ -95,15 +127,40 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False): # ── Step 2: Load model ─────────────────────────────────────────── print(f"⏳ Loading model '{model}' — downloading if needed...") - whisper_model = WhisperModel(model, device=device, compute_type=compute_type) + try: + whisper_model = WhisperModel(model, device=device, compute_type=compute_type) + except Exception as exc: + err = str(exc).lower() + cuda_runtime_missing = ( + device == "cuda" + and ( + "libcublas" in err + or "libcudnn" in err + or "cuda" in err + or "cannot be loaded" in err + or "not found" in err + ) + ) + if not cuda_runtime_missing: + raise + print("⚠ CUDA runtime not available; falling back to CPU (int8).") + print(f" Reason: {exc}") + device, compute_type = "cpu", "int8" + whisper_model = WhisperModel(model, device=device, compute_type=compute_type) print("✅ Model ready!") print(SEP) # ── Step 3: Transcribe files ───────────────────────────────────── total_files = len(glob_file) - print(f"📂 Found {total_files} item(s) in folder") + print(f"📂 Found {total_files} supported media file(s) in folder") print(SEP) + if total_files == 0: + output_text = '⚠ No supported media files found — try another folder.' + print(output_text) + print(SEP) + return output_text + files_transcripted = [] file_num = 0 for file in glob_file: @@ -112,11 +169,13 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False): print(f"\n{'─' * 46}") print(f"📄 File {file_num}/{total_files}: {title}") try: + t_start = time.time() segments, info = whisper_model.transcribe( file, language=language, beam_size=5 ) + audio_duration = info.duration # seconds # Make folder if missing os.makedirs('{}/transcriptions'.format(path), exist_ok=True) # Stream segments as they are decoded @@ -133,10 +192,16 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False): else: print(" Transcribed up to %.0fs..." % seg.end, end='\r') segment_list.append(seg) + elapsed = time.time() - t_start + elapsed_min = elapsed / 60.0 + audio_min = audio_duration / 60.0 + ratio = audio_duration / elapsed if elapsed > 0 else float('inf') print(f"✅ Done — saved to transcriptions/{title}.txt") + print(f"⏱ Transcribed {audio_min:.1f} min of audio in {elapsed_min:.1f} min ({ratio:.1f}x realtime)") files_transcripted.append(segment_list) - except Exception: - print('⚠ Not a valid audio/video file, skipping.') + except Exception as exc: + print(f"⚠ Could not decode '{os.path.basename(file)}', skipping.") + print(f" Reason: {exc}") # ── Summary ────────────────────────────────────────────────────── print(f"\n{SEP}")