Revamp: embedded console, faster-whisper, simplified install

Merge pull request #11 from soderstromkr/copilot/update-whisper-device-parameter
Pass explicit device parameter to whisper.load_model() for MPS acceleration
2026-03-02 17:02:16 +01:00 · 2026-01-22 14:03:13 +01:00 · 2026-01-22 13:00:38 +00:00 · 2026-01-22 13:00:21 +00:00 · 2026-01-22 12:57:09 +00:00 · 2026-01-22 13:53:23 +01:00
24 changed files with 739 additions and 515 deletions
@@ -0,0 +1 @@
+*.zip filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,26 @@
+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Build artifacts
+dist/
+build/
+*.egg-info/
@@ -4,8 +4,8 @@ authors:
 - family-names: "Söderström"
  given-names: "Kristofer Rolf"
  orcid: "https://orcid.org/0000-0002-5322-3350"
-title: "transcribe"
-version: 1.1.1
-doi: 10.5281/zenodo.7760511
+title: "Local Transcribe"
+version: 1.2
+doi: 10.5281/zenodo.7760510
 date-released: 2023-03-22
 url: "https://github.com/soderstromkr/transcribe"
@@ -1,100 +0,0 @@
-import tkinter as tk
-from tkinter import ttk
-from tkinter import filedialog
-from tkinter import messagebox
-from transcribe import transcribe
-from ttkthemes import ThemedTk
-import whisper 
-import numpy as np
-import glob, os
-
-
-class App:
-    def __init__(self, master):
-        self.master = master
-        master.title("Local Transcribe")
-
-        #style options
-        style = ttk.Style()
-        style.configure('TLabel', font=('Arial', 10), padding=10)
-        style.configure('TEntry', font=('Arial', 10), padding=10)
-        style.configure('TButton', font=('Arial', 10), padding=10)
-        style.configure('TCheckbutton', font=('Arial', 10), padding=10)
-
-        # Folder Path
-        path_frame = ttk.Frame(master, padding=10)
-        path_frame.pack(fill=tk.BOTH)
-        path_label = ttk.Label(path_frame, text="Folder Path:")
-        path_label.pack(side=tk.LEFT, padx=5)
-        self.path_entry = ttk.Entry(path_frame, width=50)
-        self.path_entry.insert(10, 'sample_audio/')
-        self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
-        browse_button = ttk.Button(path_frame, text="Browse", command=self.browse)
-        browse_button.pack(side=tk.LEFT, padx=5)
-
-        # File Type
-        file_type_frame = ttk.Frame(master, padding=10)
-        file_type_frame.pack(fill=tk.BOTH)
-        file_type_label = ttk.Label(file_type_frame, text="File Type:")
-        file_type_label.pack(side=tk.LEFT, padx=5)
-        self.file_type_entry = ttk.Entry(file_type_frame, width=50)
-        self.file_type_entry.insert(10, 'ogg')
-        self.file_type_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
-
-        # Model
-        model_frame = ttk.Frame(master, padding=10)
-        model_frame.pack(fill=tk.BOTH)
-        model_label = ttk.Label(model_frame, text="Model:")
-        model_label.pack(side=tk.LEFT, padx=5)
-        self.model_entry = ttk.Entry(model_frame, width=50)
-        self.model_entry.insert(10, 'small')
-        self.model_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
-
-        # Language (currently disabled)
-        #language_frame = ttk.Frame(master, padding=10)
-        #language_frame.pack(fill=tk.BOTH)
-        #language_label = ttk.Label(language_frame, text="Language:")
-        #language_label.pack(side=tk.LEFT, padx=5)
-        #self.language_entry = ttk.Entry(language_frame, width=50)
-        #self.language_entry.insert(10, np.nan)
-        #self.language_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
-
-        # Verbose
-        verbose_frame = ttk.Frame(master, padding=10)
-        verbose_frame.pack(fill=tk.BOTH)
-        self.verbose_var = tk.BooleanVar()
-        verbose_checkbutton = ttk.Checkbutton(verbose_frame, text="Verbose", variable=self.verbose_var)
-        verbose_checkbutton.pack(side=tk.LEFT, padx=5)
-
-        # Buttons
-        button_frame = ttk.Frame(master, padding=10)
-        button_frame.pack(fill=tk.BOTH)
-        transcribe_button = ttk.Button(button_frame, text="Transcribe Audio", command=self.transcribe)
-        transcribe_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
-        quit_button = ttk.Button(button_frame, text="Quit", command=master.quit)
-        quit_button.pack(side=tk.RIGHT, padx=5, pady=10, fill=tk.X, expand=True)
-
-    def browse(self):
-        folder_path = filedialog.askdirectory()
-        self.path_entry.delete(0, tk.END)
-        self.path_entry.insert(0, folder_path)
-
-    def transcribe(self):
-        path = self.path_entry.get()
-        file_type = self.file_type_entry.get()
-        model = self.model_entry.get()
-        #language = self.language_entry.get()
-        language = None # set to auto-detect
-        verbose = self.verbose_var.get()
-
-        # Call the transcribe function with the appropriate arguments
-        result = transcribe(path, file_type, model=model, language=language, verbose=verbose)
-
-        # Show the result in a message box
-        tk.messagebox.showinfo("Finished!", result)
-
-if __name__ == "__main__":
-#    root = tk.Tk()
-    root = ThemedTk(theme="clearlooks")
-    app = App(root) 
-    root.mainloop()
@@ -0,0 +1,31 @@
+### How to run on Mac / Linux
+
+#### Quick start
+1. Open Terminal and navigate to the project folder (or right-click the folder and select "Open in Terminal").
+2. Make the script executable (only needed once):
+```
+chmod +x run_Mac.sh
+```
+3. Run it:
+```
+./run_Mac.sh
+```
+
+This will automatically:
+- Create a virtual environment (`.venv`)
+- Install all dependencies (no admin rights needed)
+- Launch the app
+
+#### Manual steps (alternative)
+If you prefer to do it manually:
+```
+python3 -m venv .venv
+.venv/bin/python install.py
+.venv/bin/python app.py
+```
+
+#### Notes
+- **Python 3.10+** is required. macOS users can install it from [python.org](https://www.python.org/downloads/) or via `brew install python`.
+- **No FFmpeg install needed** — audio decoding is bundled.
+- **GPU acceleration** is not available on macOS (Apple Silicon MPS is not supported by CTranslate2). CPU with int8 quantization is still fast.
+- On Apple Silicon (M1/M2/M3/M4), the `small` or `base` models run well. `medium` works but is slower.
@@ -1,5 +0,0 @@
-### How to run on Mac
-Unfortunately, I have not found a permament solution for this, not being a Mac user has limited the ways I can test this. For now, these are the recommended steps for a beginner user:
-1. Open a terminal and navigate to the root folder (transcribe-main if you downloaded the folder). You can also right-click (or equivalent) on the root folder to open a Terminal within the folder.
-2. Run the following command: 
-python GUI.py
@@ -1,72 +1,90 @@
-## Local Transcribe
+## Local Transcribe with Whisper 

-Local Transcribe uses OpenAI's Whisper to transcribe audio files from your local folders, creating text files on disk. 
+> **⚠ Note for Mac users (Apple Silicon):** This version uses `faster-whisper` (CTranslate2), which does **not** support Apple M-chip GPU acceleration. Transcription will run on CPU, which is slower than OpenAI's Whisper with Metal/CoreML support. The trade-off is a much simpler installation — no conda, no PyTorch, no admin rights. If you'd prefer M-chip GPU acceleration and don't mind a more involved setup, switch to the **classic** release:
+> ```
+> git checkout classic
+> ```

-## Note
+Local Transcribe with Whisper is a user-friendly desktop application that allows you to transcribe audio and video files using the Whisper ASR system, powered by [faster-whisper](https://github.com/SYSTRAN/faster-whisper) (CTranslate2). This application provides a graphical user interface (GUI) built with Python and the Tkinter library, making it easy to use even for those not familiar with programming.

-This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with internet connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet. There is now also a GUI implementation, read below for more information.  
+## New in version 2.0!
+1. **Switched to faster-whisper** — up to 4× faster transcription with lower memory usage.
+2. **No separate FFmpeg installation needed** — audio decoding is handled by the bundled PyAV library.
+3. **No admin rights required** — a plain `pip install` covers everything.
+4. **No PyTorch dependency** — dramatically smaller install footprint.
+5. **`tiny` model added** — smallest and fastest option for quick drafts.

-### Instructions
-
-#### Requirements
-
-1. This script was made and tested in an Anaconda environment with Python 3.10. I recommend this method if you're not familiar with Python.
-See [here](https://docs.anaconda.com/anaconda/install/index.html) for instructions. You might need administrator rights. 
-
-2. Whisper requires some additional libraries. The [setup](https://github.com/openai/whisper#setup) page states: "The codebase also depends on a few Python packages, most notably HuggingFace Transformers for their fast tokenizer implementation and ffmpeg-python for reading audio files."
-Users might not need to specifically install Transfomers. However, a conda installation might be needed for ffmpeg[^1], which takes care of setting up PATH variables. From the anaconda prompt, type or copy the following:
-
-```
-conda install -c conda-forge ffmpeg-python
-```
-
-3. The main functionality comes from openai-whisper. See their [page](https://github.com/openai/whisper) for details. As of 2023-03-22 you can install via:
-
-```
-pip install -U openai-whisper
-```
-
-4. There is an option to run a batch file, which launches a GUI built on TKinter and TTKthemes. If using these options, make sure they are installed in your Python build. You can install them via pip.
-
-```
-pip install tk
-```
-
-and
-
-```
-pip install ttkthemes 
-```
-
-#### Using the script
-
-This is a simple script with no installation. You can download the zip folder and extract it to your preferred working folder.
-
-![](Picture1.png)  
+## Features
+* Select the folder containing the audio or video files you want to transcribe. Tested with m4a video. 
+* Choose the language of the files you are transcribing. You can either select a specific language or let the application automatically detect the language.
+* Select the Whisper model to use for the transcription. Available models include "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v2", and "large-v3". Models with .en ending are better if you're transcribing English, especially the base and small models.
+* **Swedish-optimised models** — [KB-Whisper](https://huggingface.co/collections/KBLab/kb-whisper) from the National Library of Sweden (KBLab) is available in all sizes (tiny → large). These models reduce Word Error Rate by up to 47 % compared to OpenAI Whisper on Swedish speech. The language is set to Swedish automatically when a KB model is selected.
+* Enable the verbose mode to receive detailed information during the transcription process.
+* Monitor the progress of the transcription with the progress bar and terminal. 
+* Confirmation dialog before starting the transcription to ensure you have selected the correct folder.
+* View the transcribed text in a message box once the transcription is completed.

+## Installation
+### Get the files
+Download the zip folder and extract it to your preferred working folder.  
+![](images/Picture1.png)  
 Or by cloning the repository with:
-
 ```
 git clone https://github.com/soderstromkr/transcribe.git
 ```
+### Python Version **(any platform including Mac users)**
+1. Install Python 3.10 or later. You can download it from [python.org](https://www.python.org/downloads/). During installation, **check "Add Python to PATH"**. No administrator rights are needed if you install for your user only.

+2. Run the installer. Open a terminal (Command Prompt on Windows, Terminal on Mac/Linux) in the project folder and run:
+```
+python install.py
+```
+This will:
+- Install all required packages (including bundled FFmpeg — no separate install needed)
+- **Auto-detect your NVIDIA GPU** and ask if you want GPU acceleration
+- No conda, no admin rights required

-#### Example with Jupyter Notebook
+Alternatively, you can install manually with `pip install -r requirements.txt`.

-See [example](example.ipynb) for an implementation on Jupyter Notebook, also added an example for a simple [workaround](example_no_internet.ipynb) to transcribe while offline.
+3. Run the app: 
+    1. For **Windows**: double-click `run_Windows.bat` (it will auto-install on first run) or run:
+```
+python app.py
+```
+    2. For **Mac / Linux**: run `./run_Mac.sh` (auto-installs on first run). See [Mac instructions](Mac_instructions.md) for details.

-#### Using the GUI
+    **Note** The first run with a given model will download it (~75 MB for base, ~500 MB for medium). After that, everything works offline.

-You can also run the GUI version from your terminal running ```python GUI.py``` or with the batch file called run_Windows.bat (for Windows users), just make sure to add your conda path to it. If you want to download a model first, and then go offline for transcription, I recommend running the model with the default sample folder, which will download the model locally. 
+## GPU Support
+This program **does support running on NVIDIA GPUs**, which can significantly speed up transcription times. faster-whisper uses CTranslate2, which requires NVIDIA CUDA libraries for GPU acceleration.

-The GUI should look like this:
+### Automatic Detection
+The `install.py` script **automatically detects NVIDIA GPUs** and will ask if you want to install GPU support. If you skipped it during installation, you can add it anytime:
+```
+pip install nvidia-cublas-cu12 nvidia-cudnn-cu12
+```

-![python GUI.py](gui_jpeg.jpg?raw=true)
+**Note:** Make sure your NVIDIA GPU drivers are up to date. You can check by running `nvidia-smi` in your terminal. The program will automatically detect and use your GPU if available, otherwise it falls back to CPU.

-or this, on a Mac, by running `python GUI.py` or `python3 GUI.py`:
+### Verifying GPU Support
+After installation, you can verify that your GPU is available by running:
+```python
+import ctranslate2
+print(ctranslate2.get_supported_compute_types("cuda"))
+```
+If this returns a list containing `"float16"`, GPU acceleration is working.

-![python GUI Mac.py](gui-mac.png)
+## Usage
+1. Launch the app — the built-in console panel at the bottom shows a welcome message and all progress updates.
+2. Select the folder containing the audio or video files you want to transcribe by clicking the "Browse" button next to the "Folder" label. This will open a file dialog where you can navigate to the desired folder. Remember, you won't be choosing individual files but whole folders!
+3. Enter the desired language for the transcription in the "Language" field. You can either select a language or leave it blank to enable automatic language detection.
+4. Choose the Whisper model to use for the transcription from the dropdown list next to the "Model" label.
+5. Click the "Transcribe" button to start the transcription. The button will be disabled during the process to prevent multiple transcriptions at once.
+6. Monitor progress in the embedded console panel — it shows model loading, per-file progress, and segment timestamps in real time.
+7. Once the transcription is completed, a message box will appear displaying the result. Click "OK" to close it.
+8. You can run the application again or quit at any time by clicking the "Quit" button.

-[^1]: Advanced users can use ```pip install ffmpeg-python``` but be ready to deal with some [PATH issues](https://stackoverflow.com/questions/65836756/python-ffmpeg-wont-accept-path-why), which I encountered in Windows 11.
+## Jupyter Notebook
+Don't want fancy EXEs or GUIs? Use the function as is. See [example](example.ipynb) for an implementation on Jupyter Notebook.

 [![DOI](https://zenodo.org/badge/617404576.svg)](https://zenodo.org/badge/latestdoi/617404576)
@@ -0,0 +1,196 @@
+import os
+import sys
+import tkinter as tk
+from tkinter import ttk
+from tkinter import filedialog
+from tkinter import messagebox
+from src._LocalTranscribe import transcribe, get_path
+import customtkinter
+import threading
+
+
+# ── Helper: redirect stdout/stderr into a CTkTextbox ──────────────────────
+import re
+_ANSI_RE = re.compile(r'\x1b\[[0-9;]*m')  # strip colour codes
+
+class _ConsoleRedirector:
+    """Redirects output exclusively to the in-app console panel."""
+    def __init__(self, text_widget):
+        self.widget = text_widget
+
+    def write(self, text):
+        clean = _ANSI_RE.sub('', text)        # strip ANSI colours
+        if clean.strip() == '':
+            return
+        # Schedule UI update on the main thread
+        try:
+            self.widget.after(0, self._append, clean)
+        except Exception:
+            pass
+
+    def _append(self, text):
+        self.widget.configure(state='normal')
+        self.widget.insert('end', text + ('\n' if not text.endswith('\n') else ''))
+        self.widget.see('end')
+        self.widget.configure(state='disabled')
+
+    def flush(self):
+        pass
+
+# HuggingFace model IDs for non-standard models
+HF_MODEL_MAP = {
+    'KB Swedish (tiny)':   'KBLab/kb-whisper-tiny',
+    'KB Swedish (base)':   'KBLab/kb-whisper-base',
+    'KB Swedish (small)':  'KBLab/kb-whisper-small',
+    'KB Swedish (medium)': 'KBLab/kb-whisper-medium',
+    'KB Swedish (large)':  'KBLab/kb-whisper-large',
+}
+
+
+
+customtkinter.set_appearance_mode("System")
+customtkinter.set_default_color_theme("blue")  # Themes: blue (default), dark-blue, green
+firstclick = True
+
+class App:
+    def __init__(self, master):
+        self.master = master
+        # Change font
+        font = ('Roboto', 13, 'bold')  # Change the font and size here
+        font_b = ('Roboto', 12)  # Change the font and size here
+        # Folder Path
+        path_frame = customtkinter.CTkFrame(master)
+        path_frame.pack(fill=tk.BOTH, padx=10, pady=10)
+        customtkinter.CTkLabel(path_frame, text="Folder:", font=font).pack(side=tk.LEFT, padx=5)
+        self.path_entry = customtkinter.CTkEntry(path_frame, width=50, font=font_b)
+        self.path_entry.insert(0, os.path.join(os.getcwd(), 'sample_audio'))
+        self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
+        customtkinter.CTkButton(path_frame, text="Browse", command=self.browse, font=font).pack(side=tk.LEFT, padx=5)
+        # Language frame        
+        #thanks to pommicket from Stackoverflow for this fix
+        def on_entry_click(event):
+            """function that gets called whenever entry is clicked"""        
+            global firstclick
+            if firstclick: # if this is the first time they clicked it
+                firstclick = False
+                self.language_entry.delete(0, "end") # delete all the text in the entry
+        language_frame = customtkinter.CTkFrame(master)
+        language_frame.pack(fill=tk.BOTH, padx=10, pady=10)
+        customtkinter.CTkLabel(language_frame, text="Language:", font=font).pack(side=tk.LEFT, padx=5)
+        self.language_entry = customtkinter.CTkEntry(language_frame, width=50, font=('Roboto', 12, 'italic'))
+        self.default_language_text = "Enter language (or ignore to auto-detect)"
+        self.language_entry.insert(0, self.default_language_text)
+        self.language_entry.bind('<FocusIn>', on_entry_click)
+        self.language_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
+        # Model frame
+        models = ['tiny', 'tiny.en', 'base', 'base.en',
+                  'small', 'small.en', 'medium', 'medium.en',
+                  'large-v2', 'large-v3',
+                  '───────────────',
+                  'KB Swedish (tiny)', 'KB Swedish (base)',
+                  'KB Swedish (small)', 'KB Swedish (medium)',
+                  'KB Swedish (large)']
+        model_frame = customtkinter.CTkFrame(master)
+        model_frame.pack(fill=tk.BOTH, padx=10, pady=10)
+        customtkinter.CTkLabel(model_frame, text="Model:", font=font).pack(side=tk.LEFT, padx=5)
+        # ComboBox frame
+        self.model_combobox = customtkinter.CTkComboBox(
+            model_frame, width=50, state="readonly",
+            values=models, font=font_b)
+        self.model_combobox.set('medium')  # Set the default value
+        self.model_combobox.pack(side=tk.LEFT, fill=tk.X, expand=True)
+        # Progress Bar
+        self.progress_bar = ttk.Progressbar(master, length=200, mode='indeterminate')
+        # Button actions frame
+        button_frame = customtkinter.CTkFrame(master)
+        button_frame.pack(fill=tk.BOTH, padx=10, pady=10)
+        self.transcribe_button = customtkinter.CTkButton(button_frame, text="Transcribe", command=self.start_transcription, font=font)
+        self.transcribe_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
+        customtkinter.CTkButton(button_frame, text="Quit", command=master.quit, font=font).pack(side=tk.RIGHT, padx=5, pady=10, fill=tk.X, expand=True)
+
+        # ── Embedded console / log panel ──────────────────────────────────
+        log_label = customtkinter.CTkLabel(master, text="Console output", font=font, anchor='w')
+        log_label.pack(fill=tk.X, padx=12, pady=(8, 0))
+        self.log_box = customtkinter.CTkTextbox(master, height=220, font=('Consolas', 14),
+                                                 wrap='word', state='disabled',
+                                                 fg_color='#1e1e1e', text_color='#e0e0e0')
+        self.log_box.pack(fill=tk.BOTH, expand=True, padx=10, pady=(2, 10))
+
+        # Redirect stdout & stderr into the log panel (no backend console)
+        sys.stdout = _ConsoleRedirector(self.log_box)
+        sys.stderr = _ConsoleRedirector(self.log_box)
+
+        # Welcome message (shown after redirect so it appears in the panel)
+        print("Welcome to Local Transcribe with Whisper! \U0001f600")
+        print("Transcriptions will be saved automatically.")
+        print("─" * 46)
+    # Helper functions
+    # Browsing
+    def browse(self):
+        initial_dir = os.getcwd()
+        folder_path = filedialog.askdirectory(initialdir=initial_dir)
+        self.path_entry.delete(0, tk.END)
+        self.path_entry.insert(0, folder_path)
+    # Start transcription
+    def start_transcription(self):
+        # Disable transcribe button
+        self.transcribe_button.configure(state=tk.DISABLED)
+        # Start a new thread for the transcription process
+        threading.Thread(target=self.transcribe_thread).start()
+    # Threading
+    def transcribe_thread(self):
+        path = self.path_entry.get()
+        model_display = self.model_combobox.get()
+        # Ignore the visual separator
+        if model_display.startswith('─'):
+            messagebox.showinfo("Invalid selection", "Please select a model, not the separator line.")
+            self.transcribe_button.configure(state=tk.NORMAL)
+            return
+        model = HF_MODEL_MAP.get(model_display, model_display)
+        language = self.language_entry.get()
+        # Auto-set Swedish for KB models
+        is_kb_model = model_display.startswith('KB Swedish')
+        # Check if the language field has the default text or is empty
+        if is_kb_model:
+            language = 'sv'
+        elif language == self.default_language_text or not language.strip():
+            language = None  # This is the same as passing nothing
+        verbose = True   # always show transcription progress in the console panel
+        # Show progress bar
+        self.progress_bar.pack(fill=tk.X, padx=5, pady=5)
+        self.progress_bar.start()
+        # Setting path and files
+        glob_file = get_path(path)
+        #messagebox.showinfo("Message", "Starting transcription!")
+        # Start transcription
+        try:
+            output_text = transcribe(path, glob_file, model, language, verbose)
+        except UnboundLocalError:
+            messagebox.showinfo("Files not found error!", 'Nothing found, choose another folder.')
+            pass
+        except ValueError:
+            messagebox.showinfo("Invalid language name, you might have to clear the default text to continue!")
+        # Hide progress bar
+        self.progress_bar.stop()
+        self.progress_bar.pack_forget()
+        # Enable transcribe button
+        self.transcribe_button.configure(state=tk.NORMAL)
+        # Recover output text
+        try:
+            messagebox.showinfo("Finished!", output_text)
+        except UnboundLocalError:
+            pass
+
+if __name__ == "__main__":
+    # Setting custom themes
+    root = customtkinter.CTk()
+    root.title("Local Transcribe with Whisper")
+    # Geometry — taller to accommodate the embedded console panel
+    width, height = 550, 560
+    root.geometry('{}x{}'.format(width, height))
+    root.minsize(450, 480)
+    # Icon 
+    root.iconbitmap('images/icon.ico')
+    # Run
+    app = App(root)
+    root.mainloop()
@@ -0,0 +1,20 @@
+from cx_Freeze import setup, Executable
+
+build_exe_options = {
+    "packages": ['faster_whisper','tkinter','customtkinter']
+    }
+executables = (
+    [
+        Executable(
+            "app.py",
+            icon='images/icon.ico',
+        )
+    ]
+)
+setup(
+    name="Local Transcribe with Whisper",
+    version="2.0",
+    author="Kristofer Rolf Söderström",
+    options={"build_exe":build_exe_options},
+    executables=executables
+)
@@ -1,123 +1,125 @@
 {
 "cells": [
  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "a2cd4050",
+   "attachments": {},
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [],
   "source": [
-    "from transcribe import transcribe"
+    "# Local Transcribe with Whisper\n",
+    "## Example"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "id": "24e1d24e",
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Help on function transcribe in module transcribe:\n",
+      "Help on function transcribe in module src._LocalTranscribe:\n",
      "\n",
-      "transcribe(path, file_type, model=None, language=None, verbose=True)\n",
-      "    Implementation of OpenAI's whisper model. Downloads model, transcribes audio files in a folder and returns the text files with transcriptions\n",
+      "transcribe(path, glob_file, model=None, language=None, verbose=False)\n",
+      "    Transcribes audio files in a specified folder using OpenAI's Whisper model.\n",
+      "    \n",
+      "    Args:\n",
+      "        path (str): Path to the folder containing the audio files.\n",
+      "        glob_file (list): List of audio file paths to transcribe.\n",
+      "        model (str, optional): Name of the Whisper model to use for transcription.\n",
+      "            Defaults to None, which uses the default model.\n",
+      "        language (str, optional): Language code for transcription. Defaults to None,\n",
+      "            which enables automatic language detection.\n",
+      "        verbose (bool, optional): If True, enables verbose mode with detailed information\n",
+      "            during the transcription process. Defaults to False.\n",
+      "    \n",
+      "    Returns:\n",
+      "        str: A message indicating the result of the transcription process.\n",
+      "    \n",
+      "    Raises:\n",
+      "        RuntimeError: If an invalid file is encountered, it will be skipped.\n",
+      "    \n",
+      "    Notes:\n",
+      "        - The function downloads the specified model if not available locally.\n",
+      "        - The transcribed text files will be saved in a \"transcriptions\" folder\n",
+      "          within the specified path.\n",
      "\n"
     ]
    }
   ],
   "source": [
+    "# Import the modules and get the docstring\n",
+    "from src._LocalTranscribe import transcribe, get_path\n",
    "help(transcribe)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "id": "e52477fb",
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
-    "path='sample_audio/'#folder path\n",
-    "file_type='ogg' #check your file for file type, will only transcribe those files\n",
-    "model='medium' #'small', 'medium', 'large' (tradeoff between speed and accuracy)\n",
-    "language= None #tries to auto-detect, other options include 'English', 'Spanish', etc...\n",
-    "verbose = True # prints output while transcribing, False to deactivate"
+    "# Set the variables\n",
+    "path='sample_audio/'# Folder path\n",
+    "model='small' # Model size\n",
+    "language= None # Preset language, None for automatic detection\n",
+    "verbose = True # Output transcription in realtime\n",
+    "\n",
+    "# Get glob file, additional step for app version.\n",
+    "\n",
+    "glob_file = get_path(path)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "id": "d66866af",
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Using medium model, you can change this by specifying model=\"medium\" for example\n",
-      "Only looking for file type ogg, you can change this by specifying file_type=\"mp3\"\n",
-      "Expecting None language, you can change this by specifying language=\"English\". None will try to auto-detect\n",
-      "Verbosity is True. If TRUE it will print out the text as it is transcribed, you can turn this off by setting verbose=False\n",
-      "\n",
-      "There are 2 ogg files in path: sample_audio/\n",
-      "\n",
-      "\n",
-      "Loading model...\n",
-      "Transcribing file number number 1: Armstrong_Small_Step\n",
-      "Model and file loaded...\n",
-      "Starting transcription...\n",
      "\n",
+      "Trying to transcribe file named: Armstrong_Small_Step🕐\n",
      "Detecting language using up to the first 30 seconds. Use `--language` to specify the language\n",
      "Detected language: English\n",
-      "[00:00.000 --> 00:24.000]  That's one small step for man, one giant leap for mankind.\n",
-      "\n",
-      "Finished file number 1.\n",
-      "\n",
-      "\n",
-      "\n",
-      "Transcribing file number number 2: Axel_Pettersson_röstinspelning\n",
-      "Model and file loaded...\n",
-      "Starting transcription...\n",
+      "[00:00.000 --> 00:07.000]  I'm going to step off the limb now.\n",
+      "[00:07.000 --> 00:18.000]  That's one small step for man.\n",
+      "[00:18.000 --> 00:24.000]  One giant leap for mankind.\n",
      "\n",
+      "Trying to transcribe file named: Axel_Pettersson_röstinspelning🕐\n",
      "Detecting language using up to the first 30 seconds. Use `--language` to specify the language\n",
      "Detected language: Swedish\n",
-      "[00:00.000 --> 00:16.000]  Hej, jag heter Axel Pettersson, jag föddes i Örebro 1976. Jag har varit Wikipedia sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.\n",
+      "[00:00.000 --> 00:06.140]  Hej, jag heter Axel Pettersson. Jag följer bror 1976.\n",
+      "[00:06.400 --> 00:15.100]  Jag har varit vikerpedjan sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.\n",
      "\n",
-      "Finished file number 2.\n",
+      "Trying to transcribe file named: readme🕐\n",
+      "Not a valid file, skipping.\n",
      "\n",
-      "\n",
-      "\n"
+      "Trying to transcribe file named: transcriptions🕐\n",
+      "Not a valid file, skipping.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "'Finished transcription, files can be found in sample_audio/transcriptions'"
+       "'Finished transcription, 2 files can be found in sample_audio//transcriptions'"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "transcribe(path, file_type, model, language, verbose)"
+    "# Run the script\n",
+    "transcribe(path, glob_file, model, language, verbose)"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bc67265",
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
@@ -132,8 +134,9 @@
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
-  }
+  },
+  "orig_nbformat": 4
 },
 "nbformat": 4,
- "nbformat_minor": 5
+ "nbformat_minor": 2
 }
@@ -1,231 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "eba9e610",
-   "metadata": {},
-   "source": [
-    "A simple way to avoid being connected while transcribing is to first load the model version you want to use. See [here](https://github.com/openai/whisper/blob/main/README.md#available-models-and-languages) for more info."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "85cd2d12",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Whisper(\n",
-       "  (encoder): AudioEncoder(\n",
-       "    (conv1): Conv1d(80, 1024, kernel_size=(3,), stride=(1,), padding=(1,))\n",
-       "    (conv2): Conv1d(1024, 1024, kernel_size=(3,), stride=(2,), padding=(1,))\n",
-       "    (blocks): ModuleList(\n",
-       "      (0-23): 24 x ResidualAttentionBlock(\n",
-       "        (attn): MultiHeadAttention(\n",
-       "          (query): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "          (key): Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "          (value): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "          (out): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "        )\n",
-       "        (attn_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "        (mlp): Sequential(\n",
-       "          (0): Linear(in_features=1024, out_features=4096, bias=True)\n",
-       "          (1): GELU(approximate='none')\n",
-       "          (2): Linear(in_features=4096, out_features=1024, bias=True)\n",
-       "        )\n",
-       "        (mlp_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "      )\n",
-       "    )\n",
-       "    (ln_post): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "  )\n",
-       "  (decoder): TextDecoder(\n",
-       "    (token_embedding): Embedding(51865, 1024)\n",
-       "    (blocks): ModuleList(\n",
-       "      (0-23): 24 x ResidualAttentionBlock(\n",
-       "        (attn): MultiHeadAttention(\n",
-       "          (query): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "          (key): Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "          (value): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "          (out): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "        )\n",
-       "        (attn_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "        (cross_attn): MultiHeadAttention(\n",
-       "          (query): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "          (key): Linear(in_features=1024, out_features=1024, bias=False)\n",
-       "          (value): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "          (out): Linear(in_features=1024, out_features=1024, bias=True)\n",
-       "        )\n",
-       "        (cross_attn_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "        (mlp): Sequential(\n",
-       "          (0): Linear(in_features=1024, out_features=4096, bias=True)\n",
-       "          (1): GELU(approximate='none')\n",
-       "          (2): Linear(in_features=4096, out_features=1024, bias=True)\n",
-       "        )\n",
-       "        (mlp_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "      )\n",
-       "    )\n",
-       "    (ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import whisper\n",
-    "#change to model size, bigger is more accurate but slower\n",
-    "whisper.load_model(\"medium\") #base, small, medium, large"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "0d2acd54",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#after it loads, you can disconnect from the internet and run the rest"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "a2cd4050",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from transcribe import transcribe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "24e1d24e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on function transcribe in module transcribe:\n",
-      "\n",
-      "transcribe(path, file_type, model=None, language=None, verbose=True)\n",
-      "    Implementation of OpenAI's whisper model. Downloads model, transcribes audio files in a folder and returns the text files with transcriptions\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "help(transcribe)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "e52477fb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "path='sample_audio/'#folder path\n",
-    "file_type='ogg' #check your file for file type, will only transcribe those files\n",
-    "model='medium' #'small', 'medium', 'large' (tradeoff between speed and accuracy)\n",
-    "language= None #tries to auto-detect, other options include 'English', 'Spanish', etc...\n",
-    "verbose = True # prints output while transcribing, False to deactivate"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "d66866af",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using medium model, you can change this by specifying model=\"medium\" for example\n",
-      "Only looking for file type ogg, you can change this by specifying file_type=\"mp3\"\n",
-      "Expecting None language, you can change this by specifying language=\"English\". None will try to auto-detect\n",
-      "Verbosity is True. If TRUE it will print out the text as it is transcribed, you can turn this off by setting verbose=False\n",
-      "\n",
-      "There are 2 ogg files in path: sample_audio/\n",
-      "\n",
-      "\n",
-      "Loading model...\n",
-      "Transcribing file number number 1: Armstrong_Small_Step\n",
-      "Model and file loaded...\n",
-      "Starting transcription...\n",
-      "\n",
-      "Detecting language using up to the first 30 seconds. Use `--language` to specify the language\n",
-      "Detected language: English\n",
-      "[00:00.000 --> 00:24.000]  That's one small step for man, one giant leap for mankind.\n",
-      "\n",
-      "Finished file number 1.\n",
-      "\n",
-      "\n",
-      "\n",
-      "Transcribing file number number 2: Axel_Pettersson_röstinspelning\n",
-      "Model and file loaded...\n",
-      "Starting transcription...\n",
-      "\n",
-      "Detecting language using up to the first 30 seconds. Use `--language` to specify the language\n",
-      "Detected language: Swedish\n",
-      "[00:00.000 --> 00:16.000]  Hej, jag heter Axel Pettersson, jag föddes i Örebro 1976. Jag har varit Wikipedia sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.\n",
-      "\n",
-      "Finished file number 2.\n",
-      "\n",
-      "\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'Finished transcription, files can be found in sample_audio/transcriptions'"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "transcribe(path, file_type, model, language, verbose)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bc67265",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
@@ -0,0 +1,128 @@
+"""
+Installer script for Local Transcribe with Whisper.
+Detects NVIDIA GPU and offers to install GPU acceleration support.
+
+Usage:
+    python install.py
+"""
+
+import os
+import subprocess
+import sys
+import shutil
+import site
+
+
+def detect_nvidia_gpu():
+    """Check if an NVIDIA GPU is present."""
+    candidates = [
+        shutil.which("nvidia-smi"),
+        r"C:\Windows\System32\nvidia-smi.exe",
+        r"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe",
+    ]
+    for path in candidates:
+        if not path or not os.path.isfile(path):
+            continue
+        try:
+            r = subprocess.run(
+                [path, "--query-gpu=name", "--format=csv,noheader"],
+                capture_output=True, text=True, timeout=10,
+            )
+            if r.returncode == 0 and r.stdout.strip():
+                return True, r.stdout.strip().split("\n")[0]
+        except Exception:
+            continue
+    return False, None
+
+
+def pip_install(*packages):
+    cmd = [sys.executable, "-m", "pip", "install"] + list(packages)
+    print(f"\n> {' '.join(cmd)}\n")
+    subprocess.check_call(cmd)
+
+
+def get_site_packages():
+    for p in site.getsitepackages():
+        if p.endswith("site-packages"):
+            return p
+    return site.getsitepackages()[0]
+
+
+def create_nvidia_pth():
+    """Create a .pth startup hook that registers NVIDIA DLL directories."""
+    sp = get_site_packages()
+    pth_path = os.path.join(sp, "nvidia_cuda_path.pth")
+    # This one-liner runs at Python startup, before any user code.
+    pth_content = (
+        "import os, glob as g; "
+        "any(os.add_dll_directory(d) or os.environ.__setitem__('PATH', d + os.pathsep + os.environ.get('PATH','')) "
+        "for d in g.glob(os.path.join(r'" + sp.replace("'", "\\'") + "', 'nvidia', '*', 'bin')) "
+        "+ g.glob(os.path.join(r'" + sp.replace("'", "\\'") + "', 'nvidia', '*', 'lib')) "
+        "if os.path.isdir(d)) if os.name == 'nt' else None\n"
+    )
+    with open(pth_path, "w") as f:
+        f.write(pth_content)
+    print(f"  Created CUDA startup hook: {pth_path}")
+
+
+def verify_cuda():
+    """Verify CUDA works in a fresh subprocess."""
+    try:
+        r = subprocess.run(
+            [sys.executable, "-c",
+             "import ctranslate2; "
+             "print('float16' in ctranslate2.get_supported_compute_types('cuda'))"],
+            capture_output=True, text=True, timeout=30,
+        )
+        return r.stdout.strip() == "True"
+    except Exception:
+        return False
+
+
+def main():
+    print("=" * 55)
+    print("  Local Transcribe with Whisper — Installer")
+    print("=" * 55)
+
+    # Step 1: Base packages
+    print("\n[1/2] Installing base requirements...")
+    pip_install("-r", "requirements.txt")
+    print("\n  Base requirements installed!")
+
+    # Step 2: GPU
+    print("\n[2/2] Checking for NVIDIA GPU...")
+    has_gpu, gpu_name = detect_nvidia_gpu()
+
+    if has_gpu:
+        print(f"\n  NVIDIA GPU detected: {gpu_name}")
+        print("  GPU acceleration can make transcription 2-5x faster.")
+        print("  This will install ~300 MB of additional CUDA libraries.\n")
+
+        while True:
+            answer = input("  Install GPU support? [Y/n]: ").strip().lower()
+            if answer in ("", "y", "yes"):
+                print("\n  Installing CUDA libraries...")
+                pip_install("nvidia-cublas-cu12", "nvidia-cudnn-cu12")
+                create_nvidia_pth()
+                print("\n  Verifying CUDA...")
+                if verify_cuda():
+                    print("  GPU support verified and working!")
+                else:
+                    print("  WARNING: CUDA installed but not detected.")
+                    print("  Update your NVIDIA drivers and try again.")
+                break
+            elif answer in ("n", "no"):
+                print("\n  Skipping GPU. Re-run install.py to add it later.")
+                break
+            else:
+                print("  Please enter Y or N.")
+    else:
+        print("\n  No NVIDIA GPU detected — using CPU mode.")
+
+    print("\n" + "=" * 55)
+    print("  Done! Run the app with:  python app.py")
+    print("=" * 55)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,2 @@
+faster-whisper
+customtkinter
@@ -0,0 +1,29 @@
+#!/bin/bash
+# ============================================================
+#  Local Transcribe with Whisper — macOS / Linux launcher
+# ============================================================
+# Double-click this file or run:  ./run_Mac.sh
+# On first run it creates a venv and installs dependencies.
+# ============================================================
+
+set -e
+
+cd "$(dirname "$0")"
+
+# Create .venv if it doesn't exist
+if [ ! -f ".venv/bin/python" ]; then
+    echo "Creating virtual environment..."
+    python3 -m venv .venv
+fi
+
+PYTHON=".venv/bin/python"
+
+# Install dependencies on first run
+if ! "$PYTHON" -c "import faster_whisper" 2>/dev/null; then
+    echo "First run detected — running installer..."
+    "$PYTHON" install.py
+    echo
+fi
+
+echo "Starting Local Transcribe..."
+"$PYTHON" app.py
@@ -1,5 +1,23 @@
@echo off
-echo Starting...
-call conda activate base
-REM OPTION 2 : (KEEP TEXT WITHIN QUOTES AND CHANGE USERNAME) "C:/Users/user/Anaconda3/condabin/activate.bat"
-call python GUI.py
+REM Create .venv on first run if it doesn't exist
+if not exist ".venv\Scripts\python.exe" (
+    echo Creating virtual environment...
+    python -m venv .venv
+    if errorlevel 1 (
+        echo ERROR: Failed to create virtual environment. Is Python installed and on PATH?
+        pause
+        exit /b 1
+    )
+)
+
+set PYTHON=.venv\Scripts\python.exe
+
+REM Check if dependencies are installed
+%PYTHON% -c "import faster_whisper" 2>nul
+if errorlevel 1 (
+    echo First run detected - running installer...
+    %PYTHON% install.py
+    echo.
+)
+echo Starting Local Transcribe...
+%PYTHON% app.py
@@ -1,5 +1,2 @@
 Armstrong_Small_Step
-In seconds:
-[0.00 --> 7.00]: I'm going to step off the limb now.
-[7.00 --> 18.00]: That's one small step for man.
-[18.00 --> 24.00]: One giant leap for mankind.
+[0:00:00 --> 0:00:07]: That's one small step for man, one giant leap for mankind.
@@ -1,4 +1,2 @@
 Axel_Pettersson_röstinspelning
-In seconds:
-[0.00 --> 6.14]: Hej, jag heter Axel Pettersson. Jag följer bror 1976.
-[6.40 --> 15.10]: Jag har varit vikerpedjan sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
+[0:00:00 --> 0:00:15]: Hej, jag heter Axel Pettersson, jag föddes i Örebro 1976. Jag har varit Wikipedia sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
@@ -0,0 +1,149 @@
+import os
+import sys
+import datetime
+import site
+from glob import glob
+
+# ---------------------------------------------------------------------------
+# CUDA setup — must happen before importing faster_whisper / ctranslate2
+# ---------------------------------------------------------------------------
+def _setup_cuda_dlls():
+    """Add NVIDIA pip-package DLL dirs to the DLL search path (Windows only).
+
+    pip-installed nvidia-cublas-cu12 / nvidia-cudnn-cu12 place their .dll
+    files inside the site-packages tree.  Python 3.8+ on Windows does NOT
+    search PATH for DLLs loaded via ctypes/LoadLibrary, so we must
+    explicitly register every nvidia/*/bin and nvidia/*/lib directory using
+    os.add_dll_directory *and* prepend them to PATH (some native extensions
+    still rely on PATH).
+    """
+    if sys.platform != "win32":
+        return
+    try:
+        for sp in site.getsitepackages():
+            nvidia_root = os.path.join(sp, "nvidia")
+            if not os.path.isdir(nvidia_root):
+                continue
+            for pkg in os.listdir(nvidia_root):
+                for sub in ("bin", "lib"):
+                    d = os.path.join(nvidia_root, pkg, sub)
+                    if os.path.isdir(d):
+                        os.environ["PATH"] = d + os.pathsep + os.environ.get("PATH", "")
+                        try:
+                            os.add_dll_directory(d)
+                        except (OSError, AttributeError):
+                            pass
+    except Exception:
+        pass
+
+_setup_cuda_dlls()
+
+from faster_whisper import WhisperModel
+
+
+def _detect_device():
+    """Return (device, compute_type) for the best available backend."""
+    try:
+        import ctranslate2
+        cuda_types = ctranslate2.get_supported_compute_types("cuda")
+        if "float16" in cuda_types:
+            return "cuda", "float16"
+    except Exception:
+        pass
+    return "cpu", "int8"
+
+
+# Get the path
+def get_path(path):
+    glob_file = glob(path + '/*')
+    return glob_file
+
+# Main function
+def transcribe(path, glob_file, model=None, language=None, verbose=False):
+    """
+    Transcribes audio files in a specified folder using faster-whisper (CTranslate2).
+
+    Args:
+        path (str): Path to the folder containing the audio files.
+        glob_file (list): List of audio file paths to transcribe.
+        model (str, optional): Name of the Whisper model size to use for transcription.
+            Defaults to None, which uses the default model.
+        language (str, optional): Language code for transcription. Defaults to None,
+            which enables automatic language detection.
+        verbose (bool, optional): If True, enables verbose mode with detailed information
+            during the transcription process. Defaults to False.
+
+    Returns:
+        str: A message indicating the result of the transcription process.
+
+    Raises:
+        RuntimeError: If an invalid file is encountered, it will be skipped.
+
+    Notes:
+        - The function downloads the specified model if not available locally.
+        - The transcribed text files will be saved in a "transcriptions" folder
+          within the specified path.
+        - Uses CTranslate2 for up to 4x faster inference compared to openai-whisper.
+        - FFmpeg is bundled via the PyAV dependency — no separate installation needed.
+
+    """
+    SEP = "─" * 46
+
+    # ── Step 1: Detect hardware ──────────────────────────────────────
+    device, compute_type = _detect_device()
+    print(f"⚙  Device: {device}  |  Compute: {compute_type}")
+
+    # ── Step 2: Load model ───────────────────────────────────────────
+    print(f"⏳ Loading model '{model}' — downloading if needed...")
+    whisper_model = WhisperModel(model, device=device, compute_type=compute_type)
+    print("✅ Model ready!")
+    print(SEP)
+
+    # ── Step 3: Transcribe files ─────────────────────────────────────
+    total_files = len(glob_file)
+    print(f"📂 Found {total_files} item(s) in folder")
+    print(SEP)
+
+    files_transcripted = []
+    file_num = 0
+    for file in glob_file:
+        title = os.path.basename(file).split('.')[0]
+        file_num += 1
+        print(f"\n{'─' * 46}")
+        print(f"📄 File {file_num}/{total_files}: {title}")
+        try:
+            segments, info = whisper_model.transcribe(
+                file,
+                language=language,
+                beam_size=5
+            )
+            # Make folder if missing
+            os.makedirs('{}/transcriptions'.format(path), exist_ok=True)
+            # Stream segments as they are decoded
+            segment_list = []
+            with open("{}/transcriptions/{}.txt".format(path, title), 'w', encoding='utf-8') as f:
+                f.write(title)
+                for seg in segments:
+                    start_ts = str(datetime.timedelta(seconds=seg.start))
+                    end_ts = str(datetime.timedelta(seconds=seg.end))
+                    f.write('\n[{} --> {}]:{}'.format(start_ts, end_ts, seg.text))
+                    f.flush()
+                    if verbose:
+                        print("   [%.2fs → %.2fs] %s" % (seg.start, seg.end, seg.text))
+                    else:
+                        print("   Transcribed up to %.0fs..." % seg.end, end='\r')
+                    segment_list.append(seg)
+            print(f"✅ Done — saved to transcriptions/{title}.txt")
+            files_transcripted.append(segment_list)
+        except Exception:
+            print('⚠  Not a valid audio/video file, skipping.')
+
+    # ── Summary ──────────────────────────────────────────────────────
+    print(f"\n{SEP}")
+    if len(files_transcripted) > 0:
+        output_text = f"✅ Finished! {len(files_transcripted)} file(s) transcribed.\n   Saved in: {path}/transcriptions"
+    else:
+        output_text = '⚠  No files eligible for transcription — try another folder.'
+    print(output_text)
+    print(SEP)
+    return output_text
@@ -1,56 +0,0 @@
-import whisper 
-import glob, os
-#import torch #uncomment if using torch with cuda, below too
-import datetime
-
-def transcribe(path, file_type, model=None, language=None, verbose=False):
-    '''Implementation of OpenAI's whisper model. Downloads model, transcribes audio files in a folder and returns the text files with transcriptions'''
-
-    try:
-        os.mkdir('{}/transcriptions'.format(path))
-    except FileExistsError:
-        pass
-    
-    glob_file = glob.glob(path+'/*{}'.format(file_type))
-
-    #if torch.cuda.is_available():
-    #    generator = torch.Generator('cuda').manual_seed(42)
-    #else:
-    #    generator = torch.Generator().manual_seed(42)
-        
-    print('Using {} model'.format(model))
-    print('File type is {}'.format(file_type))    
-    print('Language is being detected automatically for each file')
-    print('Verbosity is set to {}'.format(verbose))
-    print('\nThere are {} {} files in path: {}\n\n'.format(len(glob_file), file_type, path))
-    
-    print('Loading model...')
-    model = whisper.load_model(model)
-        
-    for idx,file in enumerate(glob_file):
-        title = os.path.basename(file).split('.')[0]
-        
-        print('Transcribing file number number {}: {}'.format(idx+1,title))
-        print('Model and file loaded...\nStarting transcription...\n')
-        result = model.transcribe(
-            file, 
-            language=language, 
-            verbose=verbose
-        )
-        start=[]
-        end=[]
-        text=[]
-        for i in range(len(result['segments'])):
-            start.append(str(datetime.timedelta(seconds=(result['segments'][i]['start']))))
-            end.append(str(datetime.timedelta(seconds=(result['segments'][i]['end']))))
-            text.append(result['segments'][i]['text'])
-        
-        with open("{}/transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
-            file.write(title)
-            file.write('\nIn seconds:')
-            for i in range(len(result['segments'])):
-                file.writelines('\n[{} --> {}]:{}'.format(start[i], end[i], text[i]))
-                
-        print('\nFinished file number {}.\n\n\n'.format(idx+1))
-
-    return 'Finished transcription, files can be found in {}/transcriptions'.format(path)
Author	SHA1	Message	Date
Kristofer Söderström	f8cf42733d	Revamp: embedded console, faster-whisper, simplified install	2026-03-02 17:02:16 +01:00
Kristofer Rolf Söderström	7d3fe1ba26	Merge pull request #11 from soderstromkr/copilot/update-whisper-device-parameter Pass explicit device parameter to whisper.load_model() for MPS acceleration	2026-01-22 14:03:13 +01:00
copilot-swe-agent[bot]	da42a6e4cc	Add .gitignore and remove __pycache__ files Co-authored-by: soderstromkr <23003509+soderstromkr@users.noreply.github.com>	2026-01-22 13:00:38 +00:00
copilot-swe-agent[bot]	0dab0d9bea	Add explicit device parameter to whisper.load_model() Co-authored-by: soderstromkr <23003509+soderstromkr@users.noreply.github.com>	2026-01-22 13:00:21 +00:00
copilot-swe-agent[bot]	953c71ab28	Initial plan	2026-01-22 12:57:09 +00:00
Kristofer Rolf Söderström	5522bdd575	Merge pull request #6 Merged pull request #6	2026-01-22 13:53:23 +01:00
Kristofer Rolf Söderström	861c470330	Merge pull request #10 from soderstromkr/copilot/add-readme-gpu-support Add GPU support documentation to README	2026-01-22 13:44:11 +01:00
copilot-swe-agent[bot]	6de6d4b2ff	Add GPU support section to README with CUDA PyTorch installation instructions Co-authored-by: soderstromkr <23003509+soderstromkr@users.noreply.github.com>	2026-01-22 12:42:09 +00:00
copilot-swe-agent[bot]	01552cc7cb	Initial plan	2026-01-22 12:40:19 +00:00
Yaroslav P	049a168c81	amd graphic card support	2025-03-05 16:23:10 +02:00
Kristofer Rolf Söderström	56a925463f	Update README.md	2024-05-17 08:51:16 +02:00
Kristofer Rolf Söderström	fe60b04020	Update README.md	2024-05-17 08:49:28 +02:00
Kristofer Rolf Söderström	ff06a257f2	Update README.md	2024-05-17 08:47:57 +02:00
Kristofer Rolf Söderström	5e31129ea2	Create requirements.txt	2024-05-17 08:44:39 +02:00
Kristofer Rolf Söderström	3f0bca02b7	Update README.md	2024-05-17 08:44:09 +02:00
Kristofer Rolf Söderström	488e78a5ae	Update README.md	2024-05-17 08:42:42 +02:00
Kristofer Rolf Söderström	829a054300	Update README.md	2024-05-17 08:40:42 +02:00
Kristofer Rolf Söderström	462aae12ca	Update README.md	2024-05-17 08:09:30 +02:00
Kristofer Rolf Söderström	fec9190ba1	Update README.md	2024-05-17 08:08:51 +02:00
Kristofer Rolf Söderström	0dde25204d	Update README.md removed other installation options from readme	2024-05-17 08:07:00 +02:00
Kristofer Söderström	b611aa6b8c	removed messagebox	2023-11-06 10:13:04 +01:00
Kristofer Söderström	7d50d5f4cf	QOL improvements	2023-11-06 09:57:44 +01:00
Kristofer Söderström	7799d03960	bug fixes	2023-11-06 09:31:53 +01:00
Kristofer Rolf Söderström	f88186dacc	Update app.py	2023-10-19 09:26:43 +02:00
Kristofer Rolf Söderström	3f5c1491ac	Delete build.zip	2023-10-19 09:20:55 +02:00
Kristofer Rolf Söderström	c83e15bdba	Update README.md	2023-10-19 09:20:29 +02:00
Kristofer Rolf Söderström	ff16ad30e1	Merge pull request #2 from ValentinFunk/patch-1 Fix mac instructions link	2023-10-19 09:09:01 +02:00
Valentin	622165b3e6	Update Mac_instructions.md	2023-09-08 10:11:02 +02:00
Valentin	0e9cbdca58	Fix mac instructions link	2023-09-08 10:09:15 +02:00
Kristofer Söderström	87cb509b14	added windows exe in as zip	2023-06-30 17:26:24 +02:00
Kristofer Söderström	ba935cafb7	formatting	2023-06-30 16:32:37 +02:00
Kristofer Söderström	6497508b7a	fix formatting	2023-06-30 16:23:07 +02:00
Kristofer Söderström	d96333a5a7	Complete rework for GUI, experimental EXE file and other minor changes, see readme for more info	2023-06-30 16:11:59 +02:00