KyosukeIchikawa commited on
Commit
3a494fe
·
1 Parent(s): 0d901a7

style: Complete remaining SIM108 improvements and ruff formatting

Browse files

- Apply final ternary operator simplification in content_extractor.py
- Include ruff formatter automatic improvements to code style
- Now only 3 ruff errors remain (2 SIM117, 1 SIM108)

These are all minor style improvements that can be addressed if desired.

.pre-commit-hooks/run_staged_tests.py CHANGED
@@ -11,9 +11,7 @@ import time
11
  from typing import List, Set
12
 
13
  # ロギング設定
14
- logging.basicConfig(
15
- level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
16
- )
17
  logger = logging.getLogger("run_staged_tests")
18
 
19
 
@@ -70,9 +68,7 @@ def get_test_files_to_run(staged_files: List[str]) -> Set[str]:
70
  check=True,
71
  )
72
  for test_file in matching_tests.stdout.strip().split("\n"):
73
- if (
74
- test_file and "test_audio_generator.py" not in test_file
75
- ): # Skip empty lines and problematic test
76
  test_files.add(test_file)
77
  except subprocess.CalledProcessError:
78
  pass
@@ -95,11 +91,7 @@ def run_pytest(test_files: Set[str]) -> bool:
95
  venv_pytest = "venv/bin/python -m pytest"
96
 
97
  # Use venv pytest if available, otherwise try system pytest
98
- if os.path.exists("venv/bin/python"):
99
- # タイムアウト(秒)を指定して実行
100
- cmd = f"{venv_pytest} {' '.join(test_files)} -v --timeout=30"
101
- else:
102
- cmd = f"python -m pytest {' '.join(test_files)} -v --timeout=30"
103
 
104
  logger.info(f"Running: {cmd}")
105
 
 
11
  from typing import List, Set
12
 
13
  # ロギング設定
14
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 
 
15
  logger = logging.getLogger("run_staged_tests")
16
 
17
 
 
68
  check=True,
69
  )
70
  for test_file in matching_tests.stdout.strip().split("\n"):
71
+ if test_file and "test_audio_generator.py" not in test_file: # Skip empty lines and problematic test
 
 
72
  test_files.add(test_file)
73
  except subprocess.CalledProcessError:
74
  pass
 
91
  venv_pytest = "venv/bin/python -m pytest"
92
 
93
  # Use venv pytest if available, otherwise try system pytest
94
+ cmd = f"{venv_pytest} {' '.join(test_files)} -v --timeout=30" if os.path.exists("venv/bin/python") else f"python -m pytest {' '.join(test_files)} -v --timeout=30"
 
 
 
 
95
 
96
  logger.info(f"Running: {cmd}")
97
 
tests/e2e/conftest.py CHANGED
@@ -94,10 +94,7 @@ def browser():
94
  Browser: Playwrightブラウザインスタンス
95
  """
96
  with sync_playwright() as playwright:
97
- if os.environ.get("HEADLESS", "true").lower() == "true":
98
- browser = playwright.chromium.launch(headless=True)
99
- else:
100
- browser = playwright.chromium.launch(headless=False, slow_mo=100)
101
 
102
  yield browser
103
 
@@ -125,9 +122,7 @@ def pytest_bdd_apply_tag(tag, function):
125
  return None
126
 
127
 
128
- def pytest_bdd_step_error(
129
- request, feature, scenario, step, step_func, step_func_args, exception
130
- ):
131
  """
132
  ステップが失敗した場合のフック
133
 
@@ -146,9 +141,7 @@ def pytest_bdd_step_error(
146
  step_name = step.name.replace(" ", "_")
147
  timestamp = int(time.time())
148
 
149
- screenshot_path = os.path.join(
150
- screenshot_dir, f"error_{scenario_name}_{step_name}_{timestamp}.png"
151
- )
152
 
153
  page.screenshot(path=screenshot_path)
154
  logger.error(f"スクリーンショットが保存されました: {screenshot_path}")
 
94
  Browser: Playwrightブラウザインスタンス
95
  """
96
  with sync_playwright() as playwright:
97
+ browser = playwright.chromium.launch(headless=True) if os.environ.get("HEADLESS", "true").lower() == "true" else playwright.chromium.launch(headless=False, slow_mo=100)
 
 
 
98
 
99
  yield browser
100
 
 
122
  return None
123
 
124
 
125
+ def pytest_bdd_step_error(request, feature, scenario, step, step_func, step_func_args, exception):
 
 
126
  """
127
  ステップが失敗した場合のフック
128
 
 
141
  step_name = step.name.replace(" ", "_")
142
  timestamp = int(time.time())
143
 
144
+ screenshot_path = os.path.join(screenshot_dir, f"error_{scenario_name}_{step_name}_{timestamp}.png")
 
 
145
 
146
  page.screenshot(path=screenshot_path)
147
  logger.error(f"スクリーンショットが保存されました: {screenshot_path}")
yomitalk/components/audio_generator.py CHANGED
@@ -70,13 +70,8 @@ class VoicevoxCoreManager:
70
  self.core_initialized = False
71
 
72
  # 1. Check existence of required directories
73
- if (
74
- not self.VOICEVOX_MODELS_PATH.exists()
75
- or not self.VOICEVOX_DICT_PATH.exists()
76
- ):
77
- logger.warning(
78
- "Required VOICEVOX directories not found. Please run 'make download-voicevox-core'"
79
- )
80
  return
81
 
82
  try:
@@ -84,9 +79,7 @@ class VoicevoxCoreManager:
84
  open_jtalk = self._initialize_openjtalk()
85
 
86
  # 3. Initialize ONNX Runtime
87
- runtime_path = str(
88
- self.VOICEVOX_LIB_PATH / "libvoicevox_onnxruntime.so.1.17.3"
89
- )
90
 
91
  if os.path.exists(runtime_path):
92
  logger.info("Loading ONNX runtime from local path")
@@ -102,9 +95,7 @@ class VoicevoxCoreManager:
102
  loaded_count = self._load_voice_models()
103
 
104
  if loaded_count > 0:
105
- logger.info(
106
- f"Successfully loaded {loaded_count}/{len(REQUIRED_MODEL_FILES)} voice models"
107
- )
108
  self.core_initialized = True
109
  else:
110
  logger.error("No voice models could be loaded")
@@ -229,16 +220,12 @@ class VoicevoxCoreManager:
229
  if original_surface != word.surface:
230
  self.user_dict_words.add(original_surface)
231
 
232
- logger.debug(
233
- f"Loaded user dict word: {word.surface} (original: {original_surface})"
234
- )
235
 
236
  except Exception as e:
237
  logger.warning(f"Failed to load user dictionary words: {e}")
238
 
239
- logger.info(
240
- f"Loaded {len(self.user_dict_words)} user dictionary surface forms for conversion checking"
241
- )
242
 
243
  def is_word_in_user_dict(self, word: str) -> bool:
244
  """
@@ -386,12 +373,8 @@ class AudioGenerator:
386
  If not provided, defaults to "data/temp/talks"
387
  """
388
  # Use session-specific directories if provided
389
- self.output_dir = (
390
- session_output_dir if session_output_dir else Path("data/output")
391
- )
392
- self.temp_dir = (
393
- session_temp_dir if session_temp_dir else Path("data/temp/talks")
394
- )
395
 
396
  # Make sure directories exist
397
  self.output_dir.mkdir(parents=True, exist_ok=True)
@@ -454,9 +437,7 @@ class AudioGenerator:
454
  result.extend([uppercase_part, "ズ"])
455
  else:
456
  # 英単語のパターンに基づいて分割(キャメルケース対応)
457
- segments = re.findall(
458
- r"([A-Z]{2,}(?=[A-Z][a-z]|$)|[A-Z][a-z]*|[a-z]+)", part
459
- )
460
  result.extend(segments)
461
  else:
462
  # 英単語以外はそのまま追加
@@ -509,11 +490,7 @@ class AudioGenerator:
509
  needs_space = word_count >= 6 # 6単語以上続く
510
 
511
  # 特定の品詞の前後で息継ぎ
512
- if (
513
- last_part.lower() in self.BE_VERBS
514
- or part.lower() in self.PREPOSITIONS
515
- or part.lower() in self.CONJUNCTIONS
516
- ) and word_count >= 4:
517
  needs_space = True
518
 
519
  if needs_space:
@@ -530,9 +507,7 @@ class AudioGenerator:
530
  elif not is_english_word:
531
  # 英単語でない場合はそのまま
532
  part_to_add = part
533
- elif is_all_uppercase and (
534
- len(part) <= 3 or (len(part) <= 6 and not is_romaji_readable(part))
535
- ):
536
  # 大文字のみで構成され、字数が少なくてローマ字読みできない場合はアルファベット読みして欲しいためそのまま
537
  # (字数が3文字以下なら基本的にアルファベット���みで良く, 駄目であればCONVERSION_OVERRIDEなどで変換する)
538
  part_to_add = part
@@ -546,9 +521,7 @@ class AudioGenerator:
546
 
547
  return "".join(result)
548
 
549
- def generate_character_conversation(
550
- self, podcast_text: str
551
- ) -> Generator[Optional[str], None, None]:
552
  """
553
  Generate audio for a character conversation from podcast text with streaming support.
554
 
@@ -614,10 +587,7 @@ class AudioGenerator:
614
  conversation_parts = []
615
 
616
  # キャラクターパターンを取得
617
- character_patterns = {
618
- char.display_name: [f"{char.display_name}:", f"{char.display_name}:"]
619
- for char in Character
620
- }
621
 
622
  # 複数行のセリフを処理するために現在の話者と発言を記録
623
  current_speaker = None
@@ -663,9 +633,7 @@ class AudioGenerator:
663
 
664
  # 会話部分が見つからない場合はフォーマット修正を試みる
665
  if not conversation_parts:
666
- logger.warning(
667
- "No valid conversation parts found. Attempting to fix format..."
668
- )
669
  fixed_text = self._fix_conversation_format(podcast_text)
670
  if fixed_text != podcast_text:
671
  return self._extract_conversation_parts(fixed_text)
@@ -801,10 +769,9 @@ class AudioGenerator:
801
  # 現在の話者の発言として処理
802
  if line_stripped:
803
  current_speech.append(line_stripped)
804
- elif current_speech:
805
  # 段落区切りの空行
806
- if not current_speech[-1].endswith("\n"):
807
- current_speech[-1] += "\n"
808
  elif line_stripped:
809
  # 話者が一度も検出されていない場合はデフォルト設定
810
  current_speaker = Character.ZUNDAMON.display_name
 
70
  self.core_initialized = False
71
 
72
  # 1. Check existence of required directories
73
+ if not self.VOICEVOX_MODELS_PATH.exists() or not self.VOICEVOX_DICT_PATH.exists():
74
+ logger.warning("Required VOICEVOX directories not found. Please run 'make download-voicevox-core'")
 
 
 
 
 
75
  return
76
 
77
  try:
 
79
  open_jtalk = self._initialize_openjtalk()
80
 
81
  # 3. Initialize ONNX Runtime
82
+ runtime_path = str(self.VOICEVOX_LIB_PATH / "libvoicevox_onnxruntime.so.1.17.3")
 
 
83
 
84
  if os.path.exists(runtime_path):
85
  logger.info("Loading ONNX runtime from local path")
 
95
  loaded_count = self._load_voice_models()
96
 
97
  if loaded_count > 0:
98
+ logger.info(f"Successfully loaded {loaded_count}/{len(REQUIRED_MODEL_FILES)} voice models")
 
 
99
  self.core_initialized = True
100
  else:
101
  logger.error("No voice models could be loaded")
 
220
  if original_surface != word.surface:
221
  self.user_dict_words.add(original_surface)
222
 
223
+ logger.debug(f"Loaded user dict word: {word.surface} (original: {original_surface})")
 
 
224
 
225
  except Exception as e:
226
  logger.warning(f"Failed to load user dictionary words: {e}")
227
 
228
+ logger.info(f"Loaded {len(self.user_dict_words)} user dictionary surface forms for conversion checking")
 
 
229
 
230
  def is_word_in_user_dict(self, word: str) -> bool:
231
  """
 
373
  If not provided, defaults to "data/temp/talks"
374
  """
375
  # Use session-specific directories if provided
376
+ self.output_dir = session_output_dir if session_output_dir else Path("data/output")
377
+ self.temp_dir = session_temp_dir if session_temp_dir else Path("data/temp/talks")
 
 
 
 
378
 
379
  # Make sure directories exist
380
  self.output_dir.mkdir(parents=True, exist_ok=True)
 
437
  result.extend([uppercase_part, "ズ"])
438
  else:
439
  # 英単語のパターンに基づいて分割(キャメルケース対応)
440
+ segments = re.findall(r"([A-Z]{2,}(?=[A-Z][a-z]|$)|[A-Z][a-z]*|[a-z]+)", part)
 
 
441
  result.extend(segments)
442
  else:
443
  # 英単語以外はそのまま追加
 
490
  needs_space = word_count >= 6 # 6単語以上続く
491
 
492
  # 特定の品詞の前後で息継ぎ
493
+ if (last_part.lower() in self.BE_VERBS or part.lower() in self.PREPOSITIONS or part.lower() in self.CONJUNCTIONS) and word_count >= 4:
 
 
 
 
494
  needs_space = True
495
 
496
  if needs_space:
 
507
  elif not is_english_word:
508
  # 英単語でない場合はそのまま
509
  part_to_add = part
510
+ elif is_all_uppercase and (len(part) <= 3 or (len(part) <= 6 and not is_romaji_readable(part))):
 
 
511
  # 大文字のみで構成され、字数が少なくてローマ字読みできない場合はアルファベット読みして欲しいためそのまま
512
  # (字数が3文字以下なら基本的にアルファベット���みで良く, 駄目であればCONVERSION_OVERRIDEなどで変換する)
513
  part_to_add = part
 
521
 
522
  return "".join(result)
523
 
524
+ def generate_character_conversation(self, podcast_text: str) -> Generator[Optional[str], None, None]:
 
 
525
  """
526
  Generate audio for a character conversation from podcast text with streaming support.
527
 
 
587
  conversation_parts = []
588
 
589
  # キャラクターパターンを取得
590
+ character_patterns = {char.display_name: [f"{char.display_name}:", f"{char.display_name}:"] for char in Character}
 
 
 
591
 
592
  # 複数行のセリフを処理するために現在の話者と発言を記録
593
  current_speaker = None
 
633
 
634
  # 会話部分が見つからない場合はフォーマット修正を試みる
635
  if not conversation_parts:
636
+ logger.warning("No valid conversation parts found. Attempting to fix format...")
 
 
637
  fixed_text = self._fix_conversation_format(podcast_text)
638
  if fixed_text != podcast_text:
639
  return self._extract_conversation_parts(fixed_text)
 
769
  # 現在の話者の発言として処理
770
  if line_stripped:
771
  current_speech.append(line_stripped)
772
+ elif current_speech and not current_speech[-1].endswith("\n"):
773
  # 段落区切りの空行
774
+ current_speech[-1] += "\n"
 
775
  elif line_stripped:
776
  # 話者が一度も検出されていない場合はデフォルト設定
777
  current_speaker = Character.ZUNDAMON.display_name
yomitalk/components/content_extractor.py CHANGED
@@ -75,9 +75,7 @@ class ContentExtractor:
75
  return f"URL conversion error: {str(e)}"
76
 
77
  @classmethod
78
- def extract_file_content(
79
- cls, file_obj: Any
80
- ) -> Tuple[Optional[str], Optional[bytes]]:
81
  """
82
  メモリ上でファイルコンテンツを抽出します。
83
 
@@ -99,9 +97,7 @@ class ContentExtractor:
99
  original_extension = ".txt" # デフォルト拡張子
100
  if hasattr(file_obj, "name"):
101
  # 元のファイルの拡張子を取得
102
- original_extension = os.path.splitext(Path(file_obj.name).name)[
103
- 1
104
- ].lower()
105
  # 拡張子がない場合はデフォルト値を使用
106
  if not original_extension:
107
  original_extension = ".txt"
@@ -110,10 +106,7 @@ class ContentExtractor:
110
  file_content = None
111
  if hasattr(file_obj, "read") and callable(file_obj.read):
112
  # 現在位置を記録
113
- if hasattr(file_obj, "tell") and callable(file_obj.tell):
114
- pos = file_obj.tell()
115
- else:
116
- pos = 0
117
 
118
  # コンテンツを読み込み
119
  file_content = file_obj.read()
@@ -205,9 +198,7 @@ class ContentExtractor:
205
 
206
  # メモリ上のPDFストリームを直接変換
207
  logger.debug("Processing PDF from memory stream")
208
- result = _markdown_converter.convert(
209
- pdf_stream, stream_info=stream_info
210
- )
211
 
212
  # 変換結果からテキストコンテンツを取得
213
  markdown_content = result.text_content
@@ -221,9 +212,7 @@ class ContentExtractor:
221
  return f"Unsupported file type: {file_ext}. Supported types: {', '.join(cls.SUPPORTED_EXTENSIONS)}"
222
 
223
  @classmethod
224
- def append_text_with_source(
225
- cls, existing_text: str, new_text: str, source: str, add_separator: bool = True
226
- ) -> str:
227
  """
228
  Append new text to existing text with source information.
229
 
@@ -245,18 +234,10 @@ class ContentExtractor:
245
  if add_separator:
246
  # Create markdown-style separator with source information
247
  separator = f"\n\n---\n**Source: {source}**\n\n"
248
- if existing_text.strip():
249
- # If there's existing text, add separator before new content
250
- result = existing_text.rstrip() + separator + content_to_append
251
- else:
252
- # If no existing text, add source info at the beginning
253
- result = f"**Source: {source}**\n\n" + content_to_append
254
  else:
255
  # Just append with minimal spacing
256
- if existing_text.strip():
257
- result = existing_text.rstrip() + "\n\n" + content_to_append
258
- else:
259
- result = content_to_append
260
 
261
  return result
262
 
 
75
  return f"URL conversion error: {str(e)}"
76
 
77
  @classmethod
78
+ def extract_file_content(cls, file_obj: Any) -> Tuple[Optional[str], Optional[bytes]]:
 
 
79
  """
80
  メモリ上でファイルコンテンツを抽出します。
81
 
 
97
  original_extension = ".txt" # デフォルト拡張子
98
  if hasattr(file_obj, "name"):
99
  # 元のファイルの拡張子を取得
100
+ original_extension = os.path.splitext(Path(file_obj.name).name)[1].lower()
 
 
101
  # 拡張子がない場合はデフォルト値を使用
102
  if not original_extension:
103
  original_extension = ".txt"
 
106
  file_content = None
107
  if hasattr(file_obj, "read") and callable(file_obj.read):
108
  # 現在位置を記録
109
+ pos = file_obj.tell() if hasattr(file_obj, "tell") and callable(file_obj.tell) else 0
 
 
 
110
 
111
  # コンテンツを読み込み
112
  file_content = file_obj.read()
 
198
 
199
  # メモリ上のPDFストリームを直接変換
200
  logger.debug("Processing PDF from memory stream")
201
+ result = _markdown_converter.convert(pdf_stream, stream_info=stream_info)
 
 
202
 
203
  # 変換結果からテキストコンテンツを取得
204
  markdown_content = result.text_content
 
212
  return f"Unsupported file type: {file_ext}. Supported types: {', '.join(cls.SUPPORTED_EXTENSIONS)}"
213
 
214
  @classmethod
215
+ def append_text_with_source(cls, existing_text: str, new_text: str, source: str, add_separator: bool = True) -> str:
 
 
216
  """
217
  Append new text to existing text with source information.
218
 
 
234
  if add_separator:
235
  # Create markdown-style separator with source information
236
  separator = f"\n\n---\n**Source: {source}**\n\n"
237
+ result = existing_text.rstrip() + separator + content_to_append if existing_text.strip() else f"**Source: {source}**\n\n" + content_to_append
 
 
 
 
 
238
  else:
239
  # Just append with minimal spacing
240
+ result = existing_text.rstrip() + "\n\n" + content_to_append if existing_text.strip() else content_to_append
 
 
 
241
 
242
  return result
243