Revamp: embedded console, faster-whisper, simplified install
This commit is contained in:
@@ -1,11 +1,56 @@
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import site
|
||||
from glob import glob
|
||||
import whisper
|
||||
from torch import backends, cuda, Generator
|
||||
import colorama
|
||||
from colorama import Back,Fore
|
||||
colorama.init(autoreset=True)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CUDA setup — must happen before importing faster_whisper / ctranslate2
|
||||
# ---------------------------------------------------------------------------
|
||||
def _setup_cuda_dlls():
|
||||
"""Add NVIDIA pip-package DLL dirs to the DLL search path (Windows only).
|
||||
|
||||
pip-installed nvidia-cublas-cu12 / nvidia-cudnn-cu12 place their .dll
|
||||
files inside the site-packages tree. Python 3.8+ on Windows does NOT
|
||||
search PATH for DLLs loaded via ctypes/LoadLibrary, so we must
|
||||
explicitly register every nvidia/*/bin and nvidia/*/lib directory using
|
||||
os.add_dll_directory *and* prepend them to PATH (some native extensions
|
||||
still rely on PATH).
|
||||
"""
|
||||
if sys.platform != "win32":
|
||||
return
|
||||
try:
|
||||
for sp in site.getsitepackages():
|
||||
nvidia_root = os.path.join(sp, "nvidia")
|
||||
if not os.path.isdir(nvidia_root):
|
||||
continue
|
||||
for pkg in os.listdir(nvidia_root):
|
||||
for sub in ("bin", "lib"):
|
||||
d = os.path.join(nvidia_root, pkg, sub)
|
||||
if os.path.isdir(d):
|
||||
os.environ["PATH"] = d + os.pathsep + os.environ.get("PATH", "")
|
||||
try:
|
||||
os.add_dll_directory(d)
|
||||
except (OSError, AttributeError):
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_setup_cuda_dlls()
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
|
||||
def _detect_device():
|
||||
"""Return (device, compute_type) for the best available backend."""
|
||||
try:
|
||||
import ctranslate2
|
||||
cuda_types = ctranslate2.get_supported_compute_types("cuda")
|
||||
if "float16" in cuda_types:
|
||||
return "cuda", "float16"
|
||||
except Exception:
|
||||
pass
|
||||
return "cpu", "int8"
|
||||
|
||||
|
||||
# Get the path
|
||||
@@ -16,12 +61,12 @@ def get_path(path):
|
||||
# Main function
|
||||
def transcribe(path, glob_file, model=None, language=None, verbose=False):
|
||||
"""
|
||||
Transcribes audio files in a specified folder using OpenAI's Whisper model.
|
||||
Transcribes audio files in a specified folder using faster-whisper (CTranslate2).
|
||||
|
||||
Args:
|
||||
path (str): Path to the folder containing the audio files.
|
||||
glob_file (list): List of audio file paths to transcribe.
|
||||
model (str, optional): Name of the Whisper model to use for transcription.
|
||||
model (str, optional): Name of the Whisper model size to use for transcription.
|
||||
Defaults to None, which uses the default model.
|
||||
language (str, optional): Language code for transcription. Defaults to None,
|
||||
which enables automatic language detection.
|
||||
@@ -38,59 +83,67 @@ def transcribe(path, glob_file, model=None, language=None, verbose=False):
|
||||
- The function downloads the specified model if not available locally.
|
||||
- The transcribed text files will be saved in a "transcriptions" folder
|
||||
within the specified path.
|
||||
- Uses CTranslate2 for up to 4x faster inference compared to openai-whisper.
|
||||
- FFmpeg is bundled via the PyAV dependency — no separate installation needed.
|
||||
|
||||
"""
|
||||
# Check for GPU acceleration and set device
|
||||
if backends.mps.is_available():
|
||||
device = 'mps'
|
||||
Generator('mps').manual_seed(42)
|
||||
elif cuda.is_available():
|
||||
device = 'cuda'
|
||||
Generator('cuda').manual_seed(42)
|
||||
else:
|
||||
device = 'cpu'
|
||||
Generator().manual_seed(42)
|
||||
SEP = "─" * 46
|
||||
|
||||
# Load model on the correct device
|
||||
model = whisper.load_model(model, device=device)
|
||||
# Start main loop
|
||||
files_transcripted=[]
|
||||
# ── Step 1: Detect hardware ──────────────────────────────────────
|
||||
device, compute_type = _detect_device()
|
||||
print(f"⚙ Device: {device} | Compute: {compute_type}")
|
||||
|
||||
# ── Step 2: Load model ───────────────────────────────────────────
|
||||
print(f"⏳ Loading model '{model}' — downloading if needed...")
|
||||
whisper_model = WhisperModel(model, device=device, compute_type=compute_type)
|
||||
print("✅ Model ready!")
|
||||
print(SEP)
|
||||
|
||||
# ── Step 3: Transcribe files ─────────────────────────────────────
|
||||
total_files = len(glob_file)
|
||||
print(f"📂 Found {total_files} item(s) in folder")
|
||||
print(SEP)
|
||||
|
||||
files_transcripted = []
|
||||
file_num = 0
|
||||
for file in glob_file:
|
||||
title = os.path.basename(file).split('.')[0]
|
||||
print(Back.CYAN + '\nTrying to transcribe file named: {}\U0001f550'.format(title))
|
||||
file_num += 1
|
||||
print(f"\n{'─' * 46}")
|
||||
print(f"📄 File {file_num}/{total_files}: {title}")
|
||||
try:
|
||||
result = model.transcribe(
|
||||
file,
|
||||
language=language,
|
||||
verbose=verbose
|
||||
)
|
||||
files_transcripted.append(result)
|
||||
# Make folder if missing
|
||||
try:
|
||||
os.makedirs('{}/transcriptions'.format(path), exist_ok=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
# Create segments for text files
|
||||
start = []
|
||||
end = []
|
||||
text = []
|
||||
for segment in result['segments']:
|
||||
start.append(str(datetime.timedelta(seconds=segment['start'])))
|
||||
end.append(str(datetime.timedelta(seconds=segment['end'])))
|
||||
text.append(segment['text'])
|
||||
# Save files to transcriptions folder
|
||||
with open("{}/transcriptions/{}.txt".format(path, title), 'w', encoding='utf-8') as file:
|
||||
file.write(title)
|
||||
for i in range(len(result['segments'])):
|
||||
file.write('\n[{} --> {}]:{}'.format(start[i], end[i], text[i]))
|
||||
# Skip invalid files
|
||||
except RuntimeError:
|
||||
print(Fore.RED + 'Not a valid file, skipping.')
|
||||
pass
|
||||
# Check if any files were processed.
|
||||
segments, info = whisper_model.transcribe(
|
||||
file,
|
||||
language=language,
|
||||
beam_size=5
|
||||
)
|
||||
# Make folder if missing
|
||||
os.makedirs('{}/transcriptions'.format(path), exist_ok=True)
|
||||
# Stream segments as they are decoded
|
||||
segment_list = []
|
||||
with open("{}/transcriptions/{}.txt".format(path, title), 'w', encoding='utf-8') as f:
|
||||
f.write(title)
|
||||
for seg in segments:
|
||||
start_ts = str(datetime.timedelta(seconds=seg.start))
|
||||
end_ts = str(datetime.timedelta(seconds=seg.end))
|
||||
f.write('\n[{} --> {}]:{}'.format(start_ts, end_ts, seg.text))
|
||||
f.flush()
|
||||
if verbose:
|
||||
print(" [%.2fs → %.2fs] %s" % (seg.start, seg.end, seg.text))
|
||||
else:
|
||||
print(" Transcribed up to %.0fs..." % seg.end, end='\r')
|
||||
segment_list.append(seg)
|
||||
print(f"✅ Done — saved to transcriptions/{title}.txt")
|
||||
files_transcripted.append(segment_list)
|
||||
except Exception:
|
||||
print('⚠ Not a valid audio/video file, skipping.')
|
||||
|
||||
# ── Summary ──────────────────────────────────────────────────────
|
||||
print(f"\n{SEP}")
|
||||
if len(files_transcripted) > 0:
|
||||
output_text = 'Finished transcription, {} files can be found in {}/transcriptions'.format(len(files_transcripted), path)
|
||||
output_text = f"✅ Finished! {len(files_transcripted)} file(s) transcribed.\n Saved in: {path}/transcriptions"
|
||||
else:
|
||||
output_text = 'No files elligible for transcription, try adding audio or video files to this folder or choose another folder!'
|
||||
# Return output text
|
||||
output_text = '⚠ No files eligible for transcription — try another folder.'
|
||||
print(output_text)
|
||||
print(SEP)
|
||||
return output_text
|
||||
|
||||
Reference in New Issue
Block a user