Update README.md

Add files via upload
Update README.md
2023-06-28 14:11:51 +02:00 · 2023-04-26 09:17:33 +02:00 · 2023-04-26 09:17:09 +02:00 · 2023-04-24 09:25:07 +02:00 · 2023-04-21 15:11:03 +02:00 · 2023-04-21 15:09:46 +02:00
11 changed files with 190 additions and 30 deletions
@@ -5,7 +5,7 @@ authors:
  given-names: "Kristofer Rolf"
  orcid: "https://orcid.org/0000-0002-5322-3350"
 title: "transcribe"
-version: 1.0
+version: 1.1.1
 doi: 10.5281/zenodo.7760511
 date-released: 2023-03-22
 url: "https://github.com/soderstromkr/transcribe"
@@ -0,0 +1,100 @@
 import tkinter as tk
 from tkinter import ttk
 from tkinter import filedialog
 from tkinter import messagebox
 from transcribe import transcribe
 from ttkthemes import ThemedTk
 import whisper 
 import numpy as np
 import glob, os
 class App:
    def __init__(self, master):
        self.master = master
        master.title("Local Transcribe")
        #style options
        style = ttk.Style()
        style.configure('TLabel', font=('Arial', 10), padding=10)
        style.configure('TEntry', font=('Arial', 10), padding=10)
        style.configure('TButton', font=('Arial', 10), padding=10)
        style.configure('TCheckbutton', font=('Arial', 10), padding=10)
        # Folder Path
        path_frame = ttk.Frame(master, padding=10)
        path_frame.pack(fill=tk.BOTH)
        path_label = ttk.Label(path_frame, text="Folder Path:")
        path_label.pack(side=tk.LEFT, padx=5)
        self.path_entry = ttk.Entry(path_frame, width=50)
        self.path_entry.insert(10, 'sample_audio/')
        self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
        browse_button = ttk.Button(path_frame, text="Browse", command=self.browse)
        browse_button.pack(side=tk.LEFT, padx=5)
        # File Type
        file_type_frame = ttk.Frame(master, padding=10)
        file_type_frame.pack(fill=tk.BOTH)
        file_type_label = ttk.Label(file_type_frame, text="File Type:")
        file_type_label.pack(side=tk.LEFT, padx=5)
        self.file_type_entry = ttk.Entry(file_type_frame, width=50)
        self.file_type_entry.insert(10, 'ogg')
        self.file_type_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
        # Model
        model_frame = ttk.Frame(master, padding=10)
        model_frame.pack(fill=tk.BOTH)
        model_label = ttk.Label(model_frame, text="Model:")
        model_label.pack(side=tk.LEFT, padx=5)
        self.model_entry = ttk.Entry(model_frame, width=50)
        self.model_entry.insert(10, 'small')
        self.model_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
        # Language (currently disabled)
        #language_frame = ttk.Frame(master, padding=10)
        #language_frame.pack(fill=tk.BOTH)
        #language_label = ttk.Label(language_frame, text="Language:")
        #language_label.pack(side=tk.LEFT, padx=5)
        #self.language_entry = ttk.Entry(language_frame, width=50)
        #self.language_entry.insert(10, np.nan)
        #self.language_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
        # Verbose
        verbose_frame = ttk.Frame(master, padding=10)
        verbose_frame.pack(fill=tk.BOTH)
        self.verbose_var = tk.BooleanVar()
        verbose_checkbutton = ttk.Checkbutton(verbose_frame, text="Verbose", variable=self.verbose_var)
        verbose_checkbutton.pack(side=tk.LEFT, padx=5)
        # Buttons
        button_frame = ttk.Frame(master, padding=10)
        button_frame.pack(fill=tk.BOTH)
        transcribe_button = ttk.Button(button_frame, text="Transcribe Audio", command=self.transcribe)
        transcribe_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
        quit_button = ttk.Button(button_frame, text="Quit", command=master.quit)
        quit_button.pack(side=tk.RIGHT, padx=5, pady=10, fill=tk.X, expand=True)
    def browse(self):
        folder_path = filedialog.askdirectory()
        self.path_entry.delete(0, tk.END)
        self.path_entry.insert(0, folder_path)
    def transcribe(self):
        path = self.path_entry.get()
        file_type = self.file_type_entry.get()
        model = self.model_entry.get()
        #language = self.language_entry.get()
        language = None # set to auto-detect
        verbose = self.verbose_var.get()
        # Call the transcribe function with the appropriate arguments
        result = transcribe(path, file_type, model=model, language=language, verbose=verbose)
        # Show the result in a message box
        tk.messagebox.showinfo("Finished!", result)
 if __name__ == "__main__":
 #    root = tk.Tk()
    root = ThemedTk(theme="clearlooks")
    app = App(root) 
    root.mainloop()
@@ -0,0 +1,5 @@
 ### How to run on Mac
 Unfortunately, I have not found a permament solution for this, not being a Mac user has limited the ways I can test this. For now, these are the recommended steps for a beginner user:
 1. Open a terminal and navigate to the root folder (transcribe-main if you downloaded the folder). You can also right-click (or equivalent) on the root folder to open a Terminal within the folder.
 2. Run the following command: 
 python GUI.py
@@ -1,30 +1,71 @@
-## transcribe
+## Local Transcribe
-Simple script that uses OpenAI's Whisper to transcribe audio files from your local folders. 
+
 Local Transcribe uses OpenAI's Whisper to transcribe audio files from your local folders, creating text files on disk. 
 ## Note
-This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with interent connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet. 
+
 This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with internet connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet. There is now also a GUI implementation, read below for more information.  
 ### Instructions
 #### Requirements
-1. This script was made and tested in an Anaconda environment with python 3.10. I recommend this method if you're not familiar with python.
+
 1. This script was made and tested in an Anaconda environment with Python 3.10. I recommend this method if you're not familiar with Python.
 See [here](https://docs.anaconda.com/anaconda/install/index.html) for instructions. You might need administrator rights. 
 2. Whisper requires some additional libraries. The [setup](https://github.com/openai/whisper#setup) page states: "The codebase also depends on a few Python packages, most notably HuggingFace Transformers for their fast tokenizer implementation and ffmpeg-python for reading audio files."
-Users might not need to specifically install Transfomers. However, a conda installation might be needed for ffmepg[^1], which takes care of setting up PATH variables. From the anaconda prompt, type or copy the following:
+Users might not need to specifically install Transfomers. However, a conda installation might be needed for ffmpeg[^1], which takes care of setting up PATH variables. From the anaconda prompt, type or copy the following:
 ```
 conda install -c conda-forge ffmpeg-python
 ```
 3. The main functionality comes from openai-whisper. See their [page](https://github.com/openai/whisper) for details. As of 2023-03-22 you can install via:
 ```
 pip install -U openai-whisper
 ```
 4. There is an option to run a batch file, which launches a GUI built on TKinter and TTKthemes. If using these options, make sure they are installed in your Python build. You can install them via pip.
 ```
 pip install tk
 ```
 and
 ```
 pip install ttkthemes 
 ```
 #### Using the script
-This is a simple script with no installation. You can either clone the repository with
+
 This is a simple script with no installation. You can download the zip folder and extract it to your preferred working folder.
 ![](Picture1.png)  
 Or by cloning the repository with:
 ```
 git clone https://github.com/soderstromkr/transcribe.git
 ```
 and use the example.ipynb template to use the script **OR (for beginners)** download the ```transcribe.py``` file into your work folder. Then you can either import it to another script or notebook for use. I recommend jupyter notebook for new users, see the example below. (Remember to have transcribe.py and example.ipynb in the same working folder).
-### Example
+
-See [example](example.ipynb) for an implementation on jupyter notebook, also added an example for a simple [workaround](example_no_internet.ipynb) to transcribe while offline. 
+#### Example with Jupyter Notebook
 See [example](example.ipynb) for an implementation on Jupyter Notebook, also added an example for a simple [workaround](example_no_internet.ipynb) to transcribe while offline.
 #### Using the GUI
 You can also run the GUI version from your terminal running ```python GUI.py``` or with the batch file called run_Windows.bat (for Windows users), just make sure to add your conda path to it. If you want to download a model first, and then go offline for transcription, I recommend running the model with the default sample folder, which will download the model locally. 
 The GUI should look like this:
 ![python GUI.py](gui_jpeg.jpg?raw=true)
 or this, on a Mac, by running `python GUI.py` or `python3 GUI.py`:
 ![python GUI Mac.py](gui-mac.png)
 [^1]: Advanced users can use ```pip install ffmpeg-python``` but be ready to deal with some [PATH issues](https://stackoverflow.com/questions/65836756/python-ffmpeg-wont-accept-path-why), which I encountered in Windows 11.
@@ -0,0 +1,5 @@
@echo off
 echo Starting...
 call conda activate base
 REM OPTION 2 : (KEEP TEXT WITHIN QUOTES AND CHANGE USERNAME) "C:/Users/user/Anaconda3/condabin/activate.bat"
 call python GUI.py
@@ -1,3 +1,5 @@
 Armstrong_Small_Step
 In seconds:
-[0.00 --> 24.00]: That's one small step for man, one giant leap for mankind.
+[0.00 --> 7.00]: I'm going to step off the limb now.
 [7.00 --> 18.00]: That's one small step for man.
 [18.00 --> 24.00]: One giant leap for mankind.
@@ -1,3 +1,4 @@
 Axel_Pettersson_röstinspelning
 In seconds:
-[0.00 --> 16.00]: Hej, jag heter Axel Pettersson, jag föddes i Örebro 1976. Jag har varit Wikipedia sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
+[0.00 --> 6.14]: Hej, jag heter Axel Pettersson. Jag följer bror 1976.
 [6.40 --> 15.10]: Jag har varit vikerpedjan sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
@@ -1,21 +1,27 @@
 import whisper 
 import glob, os
 #import torch #uncomment if using torch with cuda, below too
 import datetime
-def transcribe(path, file_type, model=None, language=None, verbose=True):
+def transcribe(path, file_type, model=None, language=None, verbose=False):
    '''Implementation of OpenAI's whisper model. Downloads model, transcribes audio files in a folder and returns the text files with transcriptions'''
    try:
-        os.mkdir('{}transcriptions'.format(path))
+        os.mkdir('{}/transcriptions'.format(path))
    except FileExistsError:
        pass
    glob_file = glob.glob(path+'/*{}'.format(file_type))
    path = path
-    print('Using {} model, you can change this by specifying model="medium" for example'.format(model))
+    #if torch.cuda.is_available():
-    print('Only looking for file type {}, you can change this by specifying file_type="mp3"'.format(file_type))    
+    #    generator = torch.Generator('cuda').manual_seed(42)
-    print('Expecting {} language, you can change this by specifying language="English". None will try to auto-detect'.format(language))
+    #else:
-    print('Verbosity is {}. If TRUE it will print out the text as it is transcribed, you can turn this off by setting verbose=False'.format(verbose))
+    #    generator = torch.Generator().manual_seed(42)
    print('Using {} model'.format(model))
    print('File type is {}'.format(file_type))    
    print('Language is being detected automatically for each file')
    print('Verbosity is set to {}'.format(verbose))
    print('\nThere are {} {} files in path: {}\n\n'.format(len(glob_file), file_type, path))
    print('Loading model...')
@@ -29,22 +35,22 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
        result = model.transcribe(
            file, 
            language=language, 
-            verbose=True
+            verbose=verbose
        )
        start=[]
        end=[]
        text=[]
        for i in range(len(result['segments'])):
-            start.append(result['segments'][i]['start'])
+            start.append(str(datetime.timedelta(seconds=(result['segments'][i]['start']))))
-            end.append(result['segments'][i]['end'])
+            end.append(str(datetime.timedelta(seconds=(result['segments'][i]['end']))))
            text.append(result['segments'][i]['text'])
-        with open("{}transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
+        with open("{}/transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
            file.write(title)
            file.write('\nIn seconds:')
            for i in range(len(result['segments'])):
-                file.writelines('\n[{:.2f} --> {:.2f}]:{}'.format(start[i], end[i], text[i]))
+                file.writelines('\n[{} --> {}]:{}'.format(start[i], end[i], text[i]))
        print('\nFinished file number {}.\n\n\n'.format(idx+1))
-    return 'Finished transcription, files can be found in {}transcriptions'.format(path)    
+    return 'Finished transcription, files can be found in {}/transcriptions'.format(path)
Author	SHA1	Message	Date
Kristofer Rolf Söderström	b765ff6bc6	Update README.md	2023-06-28 14:11:51 +02:00
Kristofer Rolf Söderström	867b082589	Add files via upload	2023-04-26 09:17:33 +02:00
Kristofer Rolf Söderström	b4017c6fee	Update README.md	2023-04-26 09:17:09 +02:00
Kristofer Rolf Söderström	1ea5187e78	Merge pull request #1 from bjornekstrom/main README.md formatting suggestions	2023-04-24 09:25:07 +02:00
Björn Ekström	0051ceb873	Update README.md	2023-04-21 15:11:03 +02:00
Björn Ekström	76be00552f	Updated README and Mac screenshot	2023-04-21 15:09:46 +02:00
Björn Ekström	a5dd5d4a03	Update README.md Further formatting.	2023-04-21 14:23:14 +02:00
Björn Ekström	43bcffaf4c	Update README.md Some formatting suggestions.	2023-04-21 14:22:34 +02:00
Kristofer Rolf Söderström	4e1c709f43	Update transcribe.py better time keeping	2023-04-20 20:13:54 +02:00
Kristofer Rolf Söderström	dfe967bd58	Update run_Windows.bat	2023-04-20 19:35:51 +02:00
Kristofer Rolf Söderström	586289efe5	Update Mac_instructions.txt	2023-04-19 16:51:36 +02:00
Kristofer Rolf Söderström	c5a5597eee	Update README.md	2023-04-19 16:46:49 +02:00
Kristofer Rolf Söderström	ce8c365fc4	Update and rename Mac_2_instructions.txt to Mac_instructions.txt	2023-04-17 20:28:52 +02:00
Kristofer Rolf Söderström	e2afd34170	Delete run_Mac_2.command	2023-04-17 20:25:18 +02:00
Kristofer Rolf Söderström	6fa49e41d9	Delete run_Mac_1.sh	2023-04-17 20:24:50 +02:00
Kristofer Söderström	1da9adbf5e	updated version number	2023-04-14 10:32:38 +02:00
Kristofer Söderström	2769ddf68b	dedicated windows and mac scripts, fixed verbose checkbox	2023-04-14 10:31:26 +02:00
Kristofer Rolf Söderström	1128e44486	Update README.md	2023-04-14 09:09:52 +02:00
Kristofer Rolf Söderström	eec20b48c4	Update README.md	2023-04-14 08:30:29 +02:00
Kristofer Rolf Söderström	b569d41aa9	Update README.md	2023-04-14 08:28:24 +02:00
Kristofer Rolf Söderström	99a6625e0e	Update README.md	2023-03-31 11:12:06 +02:00
Kristofer Rolf Söderström	b09114625a	Update README.md	2023-03-27 21:29:51 +02:00
Kristofer Rolf Söderström	785f2b8215	Update README.md	2023-03-27 21:28:34 +02:00
Kristofer Rolf Söderström	412ab97157	Update README.md	2023-03-27 21:26:34 +02:00
Kristofer Rolf Söderström	a14196b055	Update README.md	2023-03-27 21:25:41 +02:00
Kristofer Rolf Söderström	c319316a4d	Add files via upload	2023-03-27 21:25:11 +02:00
Kristofer Rolf Söderström	26c6f84e72	Update README.md	2023-03-27 21:24:12 +02:00
Kristofer Rolf Söderström	8f76466f57	typos	2023-03-27 21:18:04 +02:00
Kristofer Rolf Söderström	1f684a848a	Update README.md	2023-03-27 10:08:19 +02:00
Kristofer Rolf Söderström	bf75df30a4	Update README.md	2023-03-27 10:05:58 +02:00
Kristofer Söderström	f5a8b19b65	fixed bug	2023-03-27 09:57:28 +02:00
Kristofer Söderström	7bbfef44cb	added GUI and batch file to run GUI	2023-03-27 09:25:56 +02:00