Compare commits

20 Commits

Author SHA1 Message Date
Kristofer Rolf Söderström ce8c365fc4 Update and rename Mac_2_instructions.txt to Mac_instructions.txt 2023-04-17 20:28:52 +02:00
Kristofer Rolf Söderström e2afd34170 Delete run_Mac_2.command 2023-04-17 20:25:18 +02:00
Kristofer Rolf Söderström 6fa49e41d9 Delete run_Mac_1.sh 2023-04-17 20:24:50 +02:00
Kristofer Söderström 1da9adbf5e updated version number 2023-04-14 10:32:38 +02:00
Kristofer Söderström 2769ddf68b dedicated windows and mac scripts, fixed verbose checkbox 2023-04-14 10:31:26 +02:00
Kristofer Rolf Söderström 1128e44486 Update README.md 2023-04-14 09:09:52 +02:00
Kristofer Rolf Söderström eec20b48c4 Update README.md 2023-04-14 08:30:29 +02:00
Kristofer Rolf Söderström b569d41aa9 Update README.md 2023-04-14 08:28:24 +02:00
Kristofer Rolf Söderström 99a6625e0e Update README.md 2023-03-31 11:12:06 +02:00
Kristofer Rolf Söderström b09114625a Update README.md 2023-03-27 21:29:51 +02:00
Kristofer Rolf Söderström 785f2b8215 Update README.md 2023-03-27 21:28:34 +02:00
Kristofer Rolf Söderström 412ab97157 Update README.md 2023-03-27 21:26:34 +02:00
Kristofer Rolf Söderström a14196b055 Update README.md 2023-03-27 21:25:41 +02:00
Kristofer Rolf Söderström c319316a4d Add files via upload 2023-03-27 21:25:11 +02:00
Kristofer Rolf Söderström 26c6f84e72 Update README.md 2023-03-27 21:24:12 +02:00
Kristofer Rolf Söderström 8f76466f57 typos 2023-03-27 21:18:04 +02:00
Kristofer Rolf Söderström 1f684a848a Update README.md 2023-03-27 10:08:19 +02:00
Kristofer Rolf Söderström bf75df30a4 Update README.md 2023-03-27 10:05:58 +02:00
Kristofer Söderström f5a8b19b65 fixed bug 2023-03-27 09:57:28 +02:00
Kristofer Söderström 7bbfef44cb added GUI and batch file to run GUI 2023-03-27 09:25:56 +02:00
9 changed files with 142 additions and 18 deletions
+1 -1
View File
@@ -5,7 +5,7 @@ authors:
given-names: "Kristofer Rolf"
orcid: "https://orcid.org/0000-0002-5322-3350"
title: "transcribe"
version: 1.0
version: 1.1.1
doi: 10.5281/zenodo.7760511
date-released: 2023-03-22
url: "https://github.com/soderstromkr/transcribe"
+100
View File
@@ -0,0 +1,100 @@
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
from tkinter import messagebox
from transcribe import transcribe
from ttkthemes import ThemedTk
import whisper
import numpy as np
import glob, os
class App:
def __init__(self, master):
self.master = master
master.title("Local Transcribe")
#style options
style = ttk.Style()
style.configure('TLabel', font=('Arial', 10), padding=10)
style.configure('TEntry', font=('Arial', 10), padding=10)
style.configure('TButton', font=('Arial', 10), padding=10)
style.configure('TCheckbutton', font=('Arial', 10), padding=10)
# Folder Path
path_frame = ttk.Frame(master, padding=10)
path_frame.pack(fill=tk.BOTH)
path_label = ttk.Label(path_frame, text="Folder Path:")
path_label.pack(side=tk.LEFT, padx=5)
self.path_entry = ttk.Entry(path_frame, width=50)
self.path_entry.insert(10, 'sample_audio/')
self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
browse_button = ttk.Button(path_frame, text="Browse", command=self.browse)
browse_button.pack(side=tk.LEFT, padx=5)
# File Type
file_type_frame = ttk.Frame(master, padding=10)
file_type_frame.pack(fill=tk.BOTH)
file_type_label = ttk.Label(file_type_frame, text="File Type:")
file_type_label.pack(side=tk.LEFT, padx=5)
self.file_type_entry = ttk.Entry(file_type_frame, width=50)
self.file_type_entry.insert(10, 'ogg')
self.file_type_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Model
model_frame = ttk.Frame(master, padding=10)
model_frame.pack(fill=tk.BOTH)
model_label = ttk.Label(model_frame, text="Model:")
model_label.pack(side=tk.LEFT, padx=5)
self.model_entry = ttk.Entry(model_frame, width=50)
self.model_entry.insert(10, 'small')
self.model_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Language (currently disabled)
#language_frame = ttk.Frame(master, padding=10)
#language_frame.pack(fill=tk.BOTH)
#language_label = ttk.Label(language_frame, text="Language:")
#language_label.pack(side=tk.LEFT, padx=5)
#self.language_entry = ttk.Entry(language_frame, width=50)
#self.language_entry.insert(10, np.nan)
#self.language_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Verbose
verbose_frame = ttk.Frame(master, padding=10)
verbose_frame.pack(fill=tk.BOTH)
self.verbose_var = tk.BooleanVar()
verbose_checkbutton = ttk.Checkbutton(verbose_frame, text="Verbose", variable=self.verbose_var)
verbose_checkbutton.pack(side=tk.LEFT, padx=5)
# Buttons
button_frame = ttk.Frame(master, padding=10)
button_frame.pack(fill=tk.BOTH)
transcribe_button = ttk.Button(button_frame, text="Transcribe Audio", command=self.transcribe)
transcribe_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
quit_button = ttk.Button(button_frame, text="Quit", command=master.quit)
quit_button.pack(side=tk.RIGHT, padx=5, pady=10, fill=tk.X, expand=True)
def browse(self):
folder_path = filedialog.askdirectory()
self.path_entry.delete(0, tk.END)
self.path_entry.insert(0, folder_path)
def transcribe(self):
path = self.path_entry.get()
file_type = self.file_type_entry.get()
model = self.model_entry.get()
#language = self.language_entry.get()
language = None # set to auto-detect
verbose = self.verbose_var.get()
# Call the transcribe function with the appropriate arguments
result = transcribe(path, file_type, model=model, language=language, verbose=verbose)
# Show the result in a message box
tk.messagebox.showinfo("Finished!", result)
if __name__ == "__main__":
# root = tk.Tk()
root = ThemedTk(theme="clearlooks")
app = App(root)
root.mainloop()
+5
View File
@@ -0,0 +1,5 @@
### How to run on Mac
Unfortunately, I have not found a permament solution for this, not being a Mac user has limited the ways I can test this. For now, these are the recommended steps for a beginner user:
1. Open a terminal and navigate to the root folder (transcribe-main if you downloaded the folder). You can also right-click (or equivalent) on the root folder to open a Terminal within the folder.
2. Run the following command:
```python GUI.py```
+16 -4
View File
@@ -1,7 +1,7 @@
## transcribe
Simple script that uses OpenAI's Whisper to transcribe audio files from your local folders.
## Note
This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with interent connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet.
This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with internet connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet. There is now also a GUI implementation, read below for more information.
### Instructions
#### Requirements
@@ -16,15 +16,27 @@ Users might not need to specifically install Transfomers. However, a conda insta
```
pip install -U openai-whisper
```
4. There is an option to run a batch file, which launches a GUI built on TKinter and TTKthemes. If using these options, make sure they are installed in your python build. You can install them via pip.
```
pip install tk
```
and
```
pip install ttkthemes
```
#### Using the script
This is a simple script with no installation. You can either clone the repository with
```
git clone https://github.com/soderstromkr/transcribe.git
```
and use the example.ipynb template to use the script **OR (for beginners)** download the ```transcribe.py``` file into your work folder. Then you can either import it to another script or notebook for use. I recommend jupyter notebook for new users, see the example below. (Remember to have transcribe.py and example.ipynb in the same working folder).
### Example
and use the example.ipynb template to use the script.
**OR** download the ```transcribe.py``` file into your work folder. Then you can either import it to another script or notebook for use. I recommend jupyter notebook for new users, see the example below. (Remember to have transcribe.py and example.ipynb in the same working folder).
#### Example with jupyter notebook
See [example](example.ipynb) for an implementation on jupyter notebook, also added an example for a simple [workaround](example_no_internet.ipynb) to transcribe while offline.
#### Using the GUI
You can also run the GUI version from your terminal running ```python GUI.py``` or with the batch file called run_gui.bat, just make sure to add your conda path to it. If you want to download a model first, and then go offline for transcription, I recommend running the model with the default sample folder, which will download the model locally. The GUI should look like this:
![python GUI.py](gui_jpeg.jpg?raw=true)
[^1]: Advanced users can use ```pip install ffmpeg-python``` but be ready to deal with some [PATH issues](https://stackoverflow.com/questions/65836756/python-ffmpeg-wont-accept-path-why), which I encountered in Windows 11.
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

+5
View File
@@ -0,0 +1,5 @@
@echo off
echo Starting...
call conda activate venv
REM OPTION 2 : (KEEP TEXT WITHIN QUOTES AND CHANGE USERNAME) "C:/Users/user/Anaconda3/condabin/activate.bat"
call python GUI.py
@@ -1,3 +1,5 @@
Armstrong_Small_Step
In seconds:
[0.00 --> 24.00]: That's one small step for man, one giant leap for mankind.
[0.00 --> 7.00]: I'm going to step off the limb now.
[7.00 --> 18.00]: That's one small step for man.
[18.00 --> 24.00]: One giant leap for mankind.
@@ -1,3 +1,4 @@
Axel_Pettersson_röstinspelning
In seconds:
[0.00 --> 16.00]: Hej, jag heter Axel Pettersson, jag föddes i Örebro 1976. Jag har varit Wikipedia sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
[0.00 --> 6.14]: Hej, jag heter Axel Pettersson. Jag följer bror 1976.
[6.40 --> 15.10]: Jag har varit vikerpedjan sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
+9 -10
View File
@@ -1,21 +1,20 @@
import whisper
import glob, os
def transcribe(path, file_type, model=None, language=None, verbose=True):
def transcribe(path, file_type, model=None, language=None, verbose=False):
'''Implementation of OpenAI's whisper model. Downloads model, transcribes audio files in a folder and returns the text files with transcriptions'''
try:
os.mkdir('{}transcriptions'.format(path))
os.mkdir('{}/transcriptions'.format(path))
except FileExistsError:
pass
glob_file = glob.glob(path+'/*{}'.format(file_type))
path = path
print('Using {} model, you can change this by specifying model="medium" for example'.format(model))
print('Only looking for file type {}, you can change this by specifying file_type="mp3"'.format(file_type))
print('Expecting {} language, you can change this by specifying language="English". None will try to auto-detect'.format(language))
print('Verbosity is {}. If TRUE it will print out the text as it is transcribed, you can turn this off by setting verbose=False'.format(verbose))
print('Using {} model'.format(model))
print('File type is {}'.format(file_type))
print('Language is being detected automatically for each file')
print('Verbosity is set to {}'.format(verbose))
print('\nThere are {} {} files in path: {}\n\n'.format(len(glob_file), file_type, path))
print('Loading model...')
@@ -29,7 +28,7 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
result = model.transcribe(
file,
language=language,
verbose=True
verbose=verbose
)
start=[]
end=[]
@@ -39,7 +38,7 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
end.append(result['segments'][i]['end'])
text.append(result['segments'][i]['text'])
with open("{}transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
with open("{}/transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
file.write(title)
file.write('\nIn seconds:')
for i in range(len(result['segments'])):
@@ -47,4 +46,4 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
print('\nFinished file number {}.\n\n\n'.format(idx+1))
return 'Finished transcription, files can be found in {}transcriptions'.format(path)
return 'Finished transcription, files can be found in {}/transcriptions'.format(path)