import xml.etree.ElementTree as ET import os from typing import Dict, List, Tuple from .text_utils import count_tokens import logging logger = logging.getLogger(__name__) def get_available_books() -> List[Dict[str, str]]: """Get list of available book XML files Returns: List of dicts with keys: - value: filename with extension (for internal use) - label: display name without extension """ processed_dir = "texts/processed" books = [] logger.info(f"Checking directory: {processed_dir}") for file in os.listdir(processed_dir): logger.info(f"Found file: {file}") if file.endswith('.xml'): books.append({ 'value': file, 'label': file[:-4] # Remove .xml extension for display }) return books def get_book_info(xml_path: str) -> Tuple[str, List[Dict]]: """Get book title and chapter information from XML file Returns: Tuple containing: - Book title (str) - List of chapter dicts with keys: id, title, text """ tree = ET.parse(xml_path) root = tree.getroot() book_title = root.get('title') chapters = [] for chapter in root.findall('chapter'): chapter_info = { 'id': chapter.get('id'), 'title': chapter.get('title'), 'text': chapter.text.strip() if chapter.text else "" } # Remove first line and strip whitespace chapter_info['text'] = chapter_info['text'][chapter_info['text'].find("\n") + 1:].strip() chapters.append(chapter_info) return book_title, chapters def get_chapter_text(xml_path: str, chapter_id: str) -> str: """Get text content for a specific chapter""" _, chapters = get_book_info(xml_path) for chapter in chapters: if chapter['id'] == chapter_id: return chapter['text'] return "" def get_book_chapters(xml_path: str) -> List[Dict]: """Get list of chapters with id and title for dropdown""" _, chapters = get_book_info(xml_path) return [{'id': ch['id'], 'title': ch['title']} for ch in chapters]