From 58255c3d109aaeb842c9adfbc30af47895e487f0 Mon Sep 17 00:00:00 2001
From: soderstromkr <23003509+soderstromkr@users.noreply.github.com>
Date: Mon, 2 Mar 2026 21:49:32 +0100
Subject: [PATCH] =?UTF-8?q?fix:=20Linux/Ubuntu=20support=20=E2=80=94=20ico?=
 =?UTF-8?q?n=20fallback,=20HiDPI=20scaling,=20CUDA=20lib=20paths,=20per-fi?=
 =?UTF-8?q?le=20timing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- app.py: graceful icon loading (no crash on Linux Tk without .ico support)
- app.py: auto-detect display scaling for 4K/HiDPI screens
- _LocalTranscribe.py: register NVIDIA pip-package .so paths on Linux (LD_LIBRARY_PATH)
  so faster-whisper finds libcublas/libcudnn at runtime
- _LocalTranscribe.py: auto-fallback to CPU if CUDA runtime libs missing
- _LocalTranscribe.py: filter input to supported media extensions only
- _LocalTranscribe.py: show real decode errors instead of generic skip message
- _LocalTranscribe.py: per-file timer showing wall-clock vs audio duration
---
 app.py                  |  33 +++++++++-
 src/_LocalTranscribe.py | 129 ++++++++++++++++++++++++++++++----------
 2 files changed, 128 insertions(+), 34 deletions(-)

diff --git a/app.py b/app.py
index bcaefb3..840396f 100644
--- a/app.py
+++ b/app.py
@@ -52,6 +52,34 @@ customtkinter.set_appearance_mode("System")
 customtkinter.set_default_color_theme("blue")  # Themes: blue (default), dark-blue, green
 firstclick = True
 
+
+def _set_app_icon(root):
+    """Set app icon when supported, without crashing on unsupported platforms."""
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    icon_path = os.path.join(base_dir, "images", "icon.ico")
+
+    if not os.path.exists(icon_path):
+        return
+
+    try:
+        root.iconbitmap(icon_path)
+    except tk.TclError:
+        # Some Linux Tk builds don't accept .ico for iconbitmap.
+        pass
+
+
+def _apply_display_scaling(root):
+    """Auto-scale UI for high-resolution displays (e.g., 4K)."""
+    try:
+        screen_w = root.winfo_screenwidth()
+        screen_h = root.winfo_screenheight()
+        scale = min(screen_w / 1920.0, screen_h / 1080.0)
+        scale = max(1.0, min(scale, 2.0))
+        customtkinter.set_widget_scaling(scale)
+        customtkinter.set_window_scaling(scale)
+    except Exception:
+        pass
+
 class App:
     def __init__(self, master):
         self.master = master
@@ -184,13 +212,14 @@ class App:
 if __name__ == "__main__":
     # Setting custom themes
     root = customtkinter.CTk()
+    _apply_display_scaling(root)
     root.title("Local Transcribe with Whisper")
     # Geometry — taller to accommodate the embedded console panel
     width, height = 550, 560
     root.geometry('{}x{}'.format(width, height))
     root.minsize(450, 480)
-    # Icon 
-    root.iconbitmap('images/icon.ico')
+    # Icon (best-effort; ignored on platforms/builds without .ico support)
+    _set_app_icon(root)
     # Run
     app = App(root)
     root.mainloop()
diff --git a/src/_LocalTranscribe.py b/src/_LocalTranscribe.py
index 9279607..296df02 100644
--- a/src/_LocalTranscribe.py
+++ b/src/_LocalTranscribe.py
@@ -1,46 +1,71 @@
 import os
 import sys
 import datetime
+import time
 import site
 from glob import glob
 
 # ---------------------------------------------------------------------------
 # CUDA setup — must happen before importing faster_whisper / ctranslate2
 # ---------------------------------------------------------------------------
-def _setup_cuda_dlls():
-    """Add NVIDIA pip-package DLL dirs to the DLL search path (Windows only).
+def _setup_cuda_libs():
+    """Register NVIDIA pip-package lib dirs so ctranslate2 finds CUDA at runtime.
 
-    pip-installed nvidia-cublas-cu12 / nvidia-cudnn-cu12 place their .dll
-    files inside the site-packages tree.  Python 3.8+ on Windows does NOT
-    search PATH for DLLs loaded via ctypes/LoadLibrary, so we must
-    explicitly register every nvidia/*/bin and nvidia/*/lib directory using
-    os.add_dll_directory *and* prepend them to PATH (some native extensions
-    still rely on PATH).
+    pip-installed nvidia-cublas-cu12 / nvidia-cudnn-cu12 place their shared
+    libraries inside the site-packages tree.  Neither Windows nor Linux
+    automatically search those directories, so we must register them
+    explicitly:
+      - Windows: os.add_dll_directory() + PATH
+      - Linux:   LD_LIBRARY_PATH  (read by the dynamic linker)
     """
-    if sys.platform != "win32":
-        return
     try:
-        for sp in site.getsitepackages():
-            nvidia_root = os.path.join(sp, "nvidia")
-            if not os.path.isdir(nvidia_root):
-                continue
-            for pkg in os.listdir(nvidia_root):
-                for sub in ("bin", "lib"):
-                    d = os.path.join(nvidia_root, pkg, sub)
-                    if os.path.isdir(d):
-                        os.environ["PATH"] = d + os.pathsep + os.environ.get("PATH", "")
-                        try:
-                            os.add_dll_directory(d)
-                        except (OSError, AttributeError):
-                            pass
-    except Exception:
-        pass
+        sp_dirs = site.getsitepackages()
+    except AttributeError:
+        # virtualenv without site-packages helper
+        sp_dirs = [os.path.join(sys.prefix, "lib",
+                                "python" + ".".join(map(str, sys.version_info[:2])),
+                                "site-packages")]
 
-_setup_cuda_dlls()
+    for sp in sp_dirs:
+        nvidia_root = os.path.join(sp, "nvidia")
+        if not os.path.isdir(nvidia_root):
+            continue
+        for pkg in os.listdir(nvidia_root):
+            for sub in ("bin", "lib"):
+                d = os.path.join(nvidia_root, pkg, sub)
+                if not os.path.isdir(d):
+                    continue
+                if sys.platform == "win32":
+                    os.environ["PATH"] = d + os.pathsep + os.environ.get("PATH", "")
+                    try:
+                        os.add_dll_directory(d)
+                    except (OSError, AttributeError):
+                        pass
+                else:
+                    # Linux / macOS — prepend to LD_LIBRARY_PATH
+                    ld = os.environ.get("LD_LIBRARY_PATH", "")
+                    if d not in ld:
+                        os.environ["LD_LIBRARY_PATH"] = d + (":" + ld if ld else "")
+                        # Also load via ctypes so already-started process sees it
+                        import ctypes
+                        try:
+                            for so in sorted(os.listdir(d)):
+                                if so.endswith(".so") or ".so." in so:
+                                    ctypes.cdll.LoadLibrary(os.path.join(d, so))
+                        except OSError:
+                            pass
+
+_setup_cuda_libs()
 
 from faster_whisper import WhisperModel
 
 
+SUPPORTED_EXTENSIONS = {
+    ".wav", ".mp3", ".m4a", ".flac", ".ogg", ".wma", ".aac",
+    ".mp4", ".mkv", ".mov", ".webm", ".avi", ".mpeg", ".mpg",
+}
+
+
 def _detect_device():
     """Return (device, compute_type) for the best available backend."""
     try:
@@ -55,8 +80,15 @@ def _detect_device():
 
 # Get the path
 def get_path(path):
-    glob_file = glob(path + '/*')
-    return glob_file
+    all_items = glob(path + '/*')
+    media_files = []
+    for item in all_items:
+        if not os.path.isfile(item):
+            continue
+        _, ext = os.path.splitext(item)
+        if ext.lower() in SUPPORTED_EXTENSIONS:
+            media_files.append(item)
+    return sorted(media_files)
 
 # Main function
 def transcribe(path, glob_file, model=None, language=None, verbose=False):
@@ -95,15 +127,40 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False):
 
     # ── Step 2: Load model ───────────────────────────────────────────
     print(f"⏳ Loading model '{model}' — downloading if needed...")
-    whisper_model = WhisperModel(model, device=device, compute_type=compute_type)
+    try:
+        whisper_model = WhisperModel(model, device=device, compute_type=compute_type)
+    except Exception as exc:
+        err = str(exc).lower()
+        cuda_runtime_missing = (
+            device == "cuda"
+            and (
+                "libcublas" in err
+                or "libcudnn" in err
+                or "cuda" in err
+                or "cannot be loaded" in err
+                or "not found" in err
+            )
+        )
+        if not cuda_runtime_missing:
+            raise
+        print("⚠  CUDA runtime not available; falling back to CPU (int8).")
+        print(f"   Reason: {exc}")
+        device, compute_type = "cpu", "int8"
+        whisper_model = WhisperModel(model, device=device, compute_type=compute_type)
     print("✅ Model ready!")
     print(SEP)
 
     # ── Step 3: Transcribe files ─────────────────────────────────────
     total_files = len(glob_file)
-    print(f"📂 Found {total_files} item(s) in folder")
+    print(f"📂 Found {total_files} supported media file(s) in folder")
     print(SEP)
 
+    if total_files == 0:
+        output_text = '⚠  No supported media files found — try another folder.'
+        print(output_text)
+        print(SEP)
+        return output_text
+
     files_transcripted = []
     file_num = 0
     for file in glob_file:
@@ -112,11 +169,13 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False):
         print(f"\n{'─' * 46}")
         print(f"📄 File {file_num}/{total_files}: {title}")
         try:
+            t_start = time.time()
             segments, info = whisper_model.transcribe(
                 file,
                 language=language,
                 beam_size=5
             )
+            audio_duration = info.duration  # seconds
             # Make folder if missing
             os.makedirs('{}/transcriptions'.format(path), exist_ok=True)
             # Stream segments as they are decoded
@@ -133,10 +192,16 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False):
                     else:
                         print("   Transcribed up to %.0fs..." % seg.end, end='\r')
                     segment_list.append(seg)
+            elapsed = time.time() - t_start
+            elapsed_min = elapsed / 60.0
+            audio_min = audio_duration / 60.0
+            ratio = audio_duration / elapsed if elapsed > 0 else float('inf')
             print(f"✅ Done — saved to transcriptions/{title}.txt")
+            print(f"⏱  Transcribed {audio_min:.1f} min of audio in {elapsed_min:.1f} min  ({ratio:.1f}x realtime)")
             files_transcripted.append(segment_list)
-        except Exception:
-            print('⚠  Not a valid audio/video file, skipping.')
+        except Exception as exc:
+            print(f"⚠  Could not decode '{os.path.basename(file)}', skipping.")
+            print(f"   Reason: {exc}")
 
     # ── Summary ──────────────────────────────────────────────────────
     print(f"\n{SEP}")