Compare commits

18 Commits

Author SHA1 Message Date
Kristofer Söderström 1da9adbf5e updated version number 2023-04-14 10:32:38 +02:00
Kristofer Söderström 2769ddf68b dedicated windows and mac scripts, fixed verbose checkbox 2023-04-14 10:31:26 +02:00
Kristofer Rolf Söderström 1128e44486 Update README.md 2023-04-14 09:09:52 +02:00
Kristofer Rolf Söderström eec20b48c4 Update README.md 2023-04-14 08:30:29 +02:00
Kristofer Rolf Söderström b569d41aa9 Update README.md 2023-04-14 08:28:24 +02:00
Kristofer Rolf Söderström 99a6625e0e Update README.md 2023-03-31 11:12:06 +02:00
Kristofer Rolf Söderström b09114625a Update README.md 2023-03-27 21:29:51 +02:00
Kristofer Rolf Söderström 785f2b8215 Update README.md 2023-03-27 21:28:34 +02:00
Kristofer Rolf Söderström 412ab97157 Update README.md 2023-03-27 21:26:34 +02:00
Kristofer Rolf Söderström a14196b055 Update README.md 2023-03-27 21:25:41 +02:00
Kristofer Rolf Söderström c319316a4d Add files via upload 2023-03-27 21:25:11 +02:00
Kristofer Rolf Söderström 26c6f84e72 Update README.md 2023-03-27 21:24:12 +02:00
Kristofer Rolf Söderström 8f76466f57 typos 2023-03-27 21:18:04 +02:00
Kristofer Rolf Söderström 1f684a848a Update README.md 2023-03-27 10:08:19 +02:00
Kristofer Rolf Söderström bf75df30a4 Update README.md 2023-03-27 10:05:58 +02:00
Kristofer Söderström f5a8b19b65 fixed bug 2023-03-27 09:57:28 +02:00
Kristofer Söderström 7bbfef44cb added GUI and batch file to run GUI 2023-03-27 09:25:56 +02:00
Kristofer Söderström acadd17007 some corrections 2023-03-23 15:14:03 +01:00
13 changed files with 152 additions and 20 deletions
+1 -1
View File
@@ -5,7 +5,7 @@ authors:
given-names: "Kristofer Rolf"
orcid: "https://orcid.org/0000-0002-5322-3350"
title: "transcribe"
version: 1.0
version: 1.1.1
doi: 10.5281/zenodo.7760511
date-released: 2023-03-22
url: "https://github.com/soderstromkr/transcribe"
+100
View File
@@ -0,0 +1,100 @@
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
from tkinter import messagebox
from transcribe import transcribe
from ttkthemes import ThemedTk
import whisper
import numpy as np
import glob, os
class App:
def __init__(self, master):
self.master = master
master.title("Local Transcribe")
#style options
style = ttk.Style()
style.configure('TLabel', font=('Arial', 10), padding=10)
style.configure('TEntry', font=('Arial', 10), padding=10)
style.configure('TButton', font=('Arial', 10), padding=10)
style.configure('TCheckbutton', font=('Arial', 10), padding=10)
# Folder Path
path_frame = ttk.Frame(master, padding=10)
path_frame.pack(fill=tk.BOTH)
path_label = ttk.Label(path_frame, text="Folder Path:")
path_label.pack(side=tk.LEFT, padx=5)
self.path_entry = ttk.Entry(path_frame, width=50)
self.path_entry.insert(10, 'sample_audio/')
self.path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
browse_button = ttk.Button(path_frame, text="Browse", command=self.browse)
browse_button.pack(side=tk.LEFT, padx=5)
# File Type
file_type_frame = ttk.Frame(master, padding=10)
file_type_frame.pack(fill=tk.BOTH)
file_type_label = ttk.Label(file_type_frame, text="File Type:")
file_type_label.pack(side=tk.LEFT, padx=5)
self.file_type_entry = ttk.Entry(file_type_frame, width=50)
self.file_type_entry.insert(10, 'ogg')
self.file_type_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Model
model_frame = ttk.Frame(master, padding=10)
model_frame.pack(fill=tk.BOTH)
model_label = ttk.Label(model_frame, text="Model:")
model_label.pack(side=tk.LEFT, padx=5)
self.model_entry = ttk.Entry(model_frame, width=50)
self.model_entry.insert(10, 'small')
self.model_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Language (currently disabled)
#language_frame = ttk.Frame(master, padding=10)
#language_frame.pack(fill=tk.BOTH)
#language_label = ttk.Label(language_frame, text="Language:")
#language_label.pack(side=tk.LEFT, padx=5)
#self.language_entry = ttk.Entry(language_frame, width=50)
#self.language_entry.insert(10, np.nan)
#self.language_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Verbose
verbose_frame = ttk.Frame(master, padding=10)
verbose_frame.pack(fill=tk.BOTH)
self.verbose_var = tk.BooleanVar()
verbose_checkbutton = ttk.Checkbutton(verbose_frame, text="Verbose", variable=self.verbose_var)
verbose_checkbutton.pack(side=tk.LEFT, padx=5)
# Buttons
button_frame = ttk.Frame(master, padding=10)
button_frame.pack(fill=tk.BOTH)
transcribe_button = ttk.Button(button_frame, text="Transcribe Audio", command=self.transcribe)
transcribe_button.pack(side=tk.LEFT, padx=5, pady=10, fill=tk.X, expand=True)
quit_button = ttk.Button(button_frame, text="Quit", command=master.quit)
quit_button.pack(side=tk.RIGHT, padx=5, pady=10, fill=tk.X, expand=True)
def browse(self):
folder_path = filedialog.askdirectory()
self.path_entry.delete(0, tk.END)
self.path_entry.insert(0, folder_path)
def transcribe(self):
path = self.path_entry.get()
file_type = self.file_type_entry.get()
model = self.model_entry.get()
#language = self.language_entry.get()
language = None # set to auto-detect
verbose = self.verbose_var.get()
# Call the transcribe function with the appropriate arguments
result = transcribe(path, file_type, model=model, language=language, verbose=verbose)
# Show the result in a message box
tk.messagebox.showinfo("Finished!", result)
if __name__ == "__main__":
# root = tk.Tk()
root = ThemedTk(theme="clearlooks")
app = App(root)
root.mainloop()
+6
View File
@@ -0,0 +1,6 @@
### Steps to make command file executable
To make a file executable on a Mac, you need to open a terminal window in the directory where the file is located. Then run the following command:
chmod +x run_MAC_2.command
After running this command, the file should be marked as executable and you should be able to run it by double-clicking on it.
+16 -4
View File
@@ -1,7 +1,7 @@
## transcribe
Simple script that uses OpenAI's Whisper to transcribe audio files from your local folders.
## Note
This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with interent connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet.
This implementation and guide is mostly made for researchers not familiar with programming that want a way to transcribe their files locally, without internet connection, usually required within ethical data practices and frameworks. Two examples are shown, a normal workflow with internet connection. And one in which the model is loaded first, via openai-whisper, and then the transcription can be done without being connected to the internet. There is now also a GUI implementation, read below for more information.
### Instructions
#### Requirements
@@ -16,15 +16,27 @@ Users might not need to specifically install Transfomers. However, a conda insta
```
pip install -U openai-whisper
```
4. There is an option to run a batch file, which launches a GUI built on TKinter and TTKthemes. If using these options, make sure they are installed in your python build. You can install them via pip.
```
pip install tk
```
and
```
pip install ttkthemes
```
#### Using the script
This is a simple script with no installation. You can either clone the repository with
```
git clone https://github.com/soderstromkr/transcribe.git
```
and use the example.ipynb template to use the script **OR (for beginners)** download the ```transcribe.py``` file into your work folder. Then you can either import it to another script or notebook for use. I recommend jupyter notebook for new users, see the example below. (Remember to have transcribe.py and example.ipynb in the same working folder).
and use the example.ipynb template to use the script.
**OR** download the ```transcribe.py``` file into your work folder. Then you can either import it to another script or notebook for use. I recommend jupyter notebook for new users, see the example below. (Remember to have transcribe.py and example.ipynb in the same working folder).
#### Example with jupyter notebook
See [example](example.ipynb) for an implementation on jupyter notebook, also added an example for a simple [workaround](example_no_internet.ipynb) to transcribe while offline.
#### Using the GUI
You can also run the GUI version from your terminal running ```python GUI.py``` or with the batch file called run_gui.bat, just make sure to add your conda path to it. If you want to download a model first, and then go offline for transcription, I recommend running the model with the default sample folder, which will download the model locally. The GUI should look like this:
![python GUI.py](gui_jpeg.jpg?raw=true)
### Example
See the [example](example.ipynb) implementation on jupyter notebook.
[^1]: Advanced users can use ```pip install ffmpeg-python``` but be ready to deal with some [PATH issues](https://stackoverflow.com/questions/65836756/python-ffmpeg-wont-accept-path-why), which I encountered in Windows 11.
+1 -1
View File
@@ -40,7 +40,7 @@
"outputs": [],
"source": [
"path='sample_audio/'#folder path\n",
"file_type='ogg' #check your file for file type, will only transcribe files with the file type, 'ogg', 'WAV'\n",
"file_type='ogg' #check your file for file type, will only transcribe those files\n",
"model='medium' #'small', 'medium', 'large' (tradeoff between speed and accuracy)\n",
"language= None #tries to auto-detect, other options include 'English', 'Spanish', etc...\n",
"verbose = True # prints output while transcribing, False to deactivate"
+1 -1
View File
@@ -132,7 +132,7 @@
"outputs": [],
"source": [
"path='sample_audio/'#folder path\n",
"file_type='ogg' #check your file for file type, will only transcribe files with the file type, 'ogg', 'WAV'\n",
"file_type='ogg' #check your file for file type, will only transcribe those files\n",
"model='medium' #'small', 'medium', 'large' (tradeoff between speed and accuracy)\n",
"language= None #tries to auto-detect, other options include 'English', 'Spanish', etc...\n",
"verbose = True # prints output while transcribing, False to deactivate"
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

+4
View File
@@ -0,0 +1,4 @@
#!/bin/bash
echo Starting...
conda activate venv
python -u GUI.py
+3
View File
@@ -0,0 +1,3 @@
#!/bin/bash
echo Running Script
python -u GUI.py
+5
View File
@@ -0,0 +1,5 @@
@echo off
echo Starting...
call conda activate venv
REM OPTION 2 : (KEEP TEXT WITHIN QUOTES AND CHANGE USERNAME) "C:/Users/user/Anaconda3/condabin/activate.bat"
call python GUI.py
@@ -1,3 +1,5 @@
Armstrong_Small_Step
In seconds:
[0.00 --> 24.00]: That's one small step for man, one giant leap for mankind.
[0.00 --> 7.00]: I'm going to step off the limb now.
[7.00 --> 18.00]: That's one small step for man.
[18.00 --> 24.00]: One giant leap for mankind.
@@ -1,3 +1,4 @@
Axel_Pettersson_röstinspelning
In seconds:
[0.00 --> 16.00]: Hej, jag heter Axel Pettersson, jag föddes i Örebro 1976. Jag har varit Wikipedia sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
[0.00 --> 6.14]: Hej, jag heter Axel Pettersson. Jag följer bror 1976.
[6.40 --> 15.10]: Jag har varit vikerpedjan sen 2008 och jag har översatt röstintroduktionsprojektet till svenska.
+9 -10
View File
@@ -1,21 +1,20 @@
import whisper
import glob, os
def transcribe(path, file_type, model=None, language=None, verbose=True):
def transcribe(path, file_type, model=None, language=None, verbose=False):
'''Implementation of OpenAI's whisper model. Downloads model, transcribes audio files in a folder and returns the text files with transcriptions'''
try:
os.mkdir('{}transcriptions'.format(path))
os.mkdir('{}/transcriptions'.format(path))
except FileExistsError:
pass
glob_file = glob.glob(path+'/*{}'.format(file_type))
path = path
print('Using {} model, you can change this by specifying model="medium" for example'.format(model))
print('Only looking for file type {}, you can change this by specifying file_type="mp3"'.format(file_type))
print('Expecting {} language, you can change this by specifying language="English". None will try to auto-detect'.format(language))
print('Verbosity is {}. If TRUE it will print out the text as it is transcribed, you can turn this off by setting verbose=False'.format(verbose))
print('Using {} model'.format(model))
print('File type is {}'.format(file_type))
print('Language is being detected automatically for each file')
print('Verbosity is set to {}'.format(verbose))
print('\nThere are {} {} files in path: {}\n\n'.format(len(glob_file), file_type, path))
print('Loading model...')
@@ -29,7 +28,7 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
result = model.transcribe(
file,
language=language,
verbose=True
verbose=verbose
)
start=[]
end=[]
@@ -39,7 +38,7 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
end.append(result['segments'][i]['end'])
text.append(result['segments'][i]['text'])
with open("{}transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
with open("{}/transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file:
file.write(title)
file.write('\nIn seconds:')
for i in range(len(result['segments'])):
@@ -47,4 +46,4 @@ def transcribe(path, file_type, model=None, language=None, verbose=True):
print('\nFinished file number {}.\n\n\n'.format(idx+1))
return 'Finished transcription, files can be found in {}transcriptions'.format(path)
return 'Finished transcription, files can be found in {}/transcriptions'.format(path)