はじめに
以前にもReazonSpeechを試しています。touch-sp.hatenablog.com
touch-sp.hatenablog.com
今回3回目です。
環境
Windows 11 CUDA 12.6 Python 3.13.3
PySide6でGUIを作っています。
torch==2.7.0+cu126 PySide6==6.9.0

環境構築
pip install torch==2.7.0+cu126 --index-url https://download.pytorch.org/whl/cu126 pip install pyside6 git clone https://github.com/reazon-research/ReazonSpeech pip install ReazonSpeech/pkg/k2-asr
Pythonスクリプト
import sys import os from pathlib import Path from datetime import datetime from queue import Queue from threading import Thread from PySide6.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QTextEdit, QLabel from PySide6.QtCore import QUrl, Signal, QObject, QTimer from PySide6.QtMultimedia import QMediaRecorder, QMediaCaptureSession, QAudioInput, QMediaFormat from reazonspeech.k2.asr import load_model, transcribe, audio_from_path model = load_model() class TranscriptionSignals(QObject): transcription_done = Signal(str) error_occurred = Signal(str) class AudioTranscriptionApp(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("音声録音と自動文字起こし") self.setGeometry(100, 100, 400, 300) layout = QVBoxLayout() self.record_button = QPushButton("録音開始") self.record_button.clicked.connect(self.toggle_recording) layout.addWidget(self.record_button) self.status_label = QLabel("待機中") layout.addWidget(self.status_label) self.text_edit = QTextEdit() layout.addWidget(self.text_edit) container = QWidget() container.setLayout(layout) self.setCentralWidget(container) # メディアコンポーネントの設定 self.audio_input = QAudioInput() self.media_recorder = QMediaRecorder() self.capture_session = QMediaCaptureSession() self.capture_session.setAudioInput(self.audio_input) self.capture_session.setRecorder(self.media_recorder) # 録音フォーマットの設定 audio_settings = self.media_recorder.mediaFormat() audio_settings.setFileFormat(QMediaFormat.FileFormat.Wave) audio_settings.setAudioCodec(QMediaFormat.AudioCodec.Wave) self.media_recorder.setMediaFormat(audio_settings) # 品質設定 self.media_recorder.setQuality(QMediaRecorder.Quality.HighQuality) self.is_recording = False self.temp_file = None # 録音キューと文字起こしスレッドの設定 self.audio_queue = Queue() self.transcription_thread = Thread(target=self.transcribe_audio_thread, daemon=True) self.transcription_thread.start() # シグナルの設定 self.signals = TranscriptionSignals() self.signals.transcription_done.connect(self.update_transcription) self.signals.error_occurred.connect(self.show_error) # メディアレコーダーのシグナル接続 self.media_recorder.recorderStateChanged.connect(self.on_recorder_state_changed) def toggle_recording(self): if not self.is_recording: self.start_recording() else: self.stop_recording() def start_recording(self): now = datetime.now() current_time = now.strftime("%Y%m%d_%H%M%S") current_directory = Path.cwd() self.temp_file = Path(current_directory, f"{current_time}.wav") self.media_recorder.setOutputLocation(QUrl.fromLocalFile(str(self.temp_file))) self.media_recorder.record() self.is_recording = True self.record_button.setText("録音停止") self.status_label.setText("録音中...") def stop_recording(self): self.media_recorder.stop() self.is_recording = False self.record_button.setText("録音開始") self.status_label.setText("録音停止中...") def on_recorder_state_changed(self, state): if state == QMediaRecorder.StoppedState: QTimer.singleShot(1000, self.process_recorded_file) def process_recorded_file(self): if self.temp_file and os.path.exists(self.temp_file): self.status_label.setText("文字起こし中...") self.audio_queue.put(str(self.temp_file)) self.temp_file = None else: self.signals.error_occurred.emit("録音ファイルが見つかりません。") def transcribe_audio_thread(self): while True: audio_file = self.audio_queue.get() try: audio = audio_from_path(audio_file) ret = transcribe(model, audio) self.signals.transcription_done.emit(ret.text) except Exception as e: self.signals.error_occurred.emit(f"文字起こし中にエラーが発生しました: {str(e)}") finally: self.audio_queue.task_done() def update_transcription(self, text): self.text_edit.append(text) self.status_label.setText("待機中") def show_error(self, error_message): self.status_label.setText("エラー") self.text_edit.append(f"エラー: {error_message}") if __name__ == "__main__": app = QApplication(sys.argv) window = AudioTranscriptionApp() window.show() sys.exit(app.exec())
このスクリプトはこちらとほとんど同じです。
touch-sp.hatenablog.com