From 55b82fddc9283475acbf87abcbddae79a2038035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristofer=20Rolf=20S=C3=B6derstr=C3=B6m?= Date: Wed, 22 Mar 2023 11:38:51 +0100 Subject: [PATCH] Create transcribe.py --- transcribe.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 transcribe.py diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..08ebf69 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,52 @@ +import whisper +import glob, os + +def transcribe(path, file_type, model='base', language=None, verbose=True): + '''Implementation of OpenAI's whisper model. Downloads model, transcribes audio files + in a folder and returns the text files with transcriptions.''' + + try: + os.mkdir('{}transcriptions'.format(path)) + except FileExistsError: + pass + + glob_file = glob.glob(path+'/*{}'.format(file_type)) + path = path + + print('Using {} model, you can change this by specifying model="medium" for example'.format(model)) + print('Only looking for file type {}, you can change this by specifying file_type="mp3"'.format(file_type)) + print('Expecting {} language, you can change this by specifying language="English". None will try to auto-detect'.format(language)) + print('Verbosity is {}. If TRUE it will print out the text as it is transcribed, you can turn this off by setting verbose=False'.format(verbose)) + print('\nThere are {} {} files in path: {}\n\n'.format(len(glob_file), file_type, path)) + print('Loading model...') + model = whisper.load_model(model) + + + + for idx,file in enumerate(glob_file): + title = os.path.basename(file).split('.')[0] + + print('Transcribing file number number {}: {}'.format(idx+1,file)) + print('Model and file loaded...\nStarting transcription...\n') + result = model.transcribe( + file, + language=language, + verbose=True + ) + start=[] + end=[] + text=[] + for i in range(len(result['segments'])): + start.append(result['segments'][i]['start']) + end.append(result['segments'][i]['end']) + text.append(result['segments'][i]['text']) + + with open("{}transcriptions/{}.txt".format(path,title), 'w', encoding='utf-8') as file: + file.write(title) + file.write('\nIn seconds:') + for i in range(len(result['segments'])): + file.writelines('\n[{:.2f} --> {:.2f}]:{}'.format(start[i], end[i], text[i])) + + print('\nFinished file number {}.\n\n\n'.format(idx+1)) + + return 'Finished transcription, files can be found in {}'.format(path)