# clean_app.py # ----------------------------------- # Copy_right CC _developed By HNM # ----------------------------------- import re import gradio as gr import tempfile # STEP A mappings = { "\x04ٲ": "ٲ", "\x04ُ": "ُ", "\x04ٚ": "ٚ", "\x04ٕ": "ٕ", "\x04ٛ": "ٛ", "\x04ٔ": "ٔ", "\x04ں": "ں", "\x04": "", # if stray \x04 alone, remove or replace as needed # ASCII symbol mappings ">": "ٲ", "<": "ُ", ";": "ٚ", "=": "ٕ", ":": "ٛ", ".": "ٔ", ",": "ں", "/": "" # remove slash } def basic_replacements(text: str) -> str: """ Apply the dictionary-based .replace() calls for the \x04 combos and ASCII symbols. """ for old, new in mappings.items(): text = text.replace(old, new) return text # STEP B def fix_alif_combo(text: str) -> str: """ Replace any occurrence of 'اٲ' with 'ٲ'. """ return text.replace("اٲ", "ٲ") # STEP C def fix_question_mark(text: str) -> str: """ For each occurrence of (.)?(.) => remove '?', add "یٕ" to the 2nd letter. E.g. "س?ت" => "ستیٕ". """ def _repl(m): first_char = m.group(1) # the character before '?' second_char = m.group(2) # the character after '?' return f"{first_char}{second_char}یٕ" pattern = r"(.)\?(.)" return re.sub(pattern, _repl, text) # STEP D def clean_line(line: str) -> str: """ Cleans a single line using: 1) basic replacements (\x04 combos, ASCII symbols), 2) fix_alif_combo (اٲ -> ٲ), 3) fix_question_mark (س?ت -> ستیٕ) """ line = basic_replacements(line) line = fix_alif_combo(line) line = fix_question_mark(line) return line # Helper: cleans the entire string (multiple lines). def clean_text(input_text: str) -> str: # Split into lines, clean each, then join lines = input_text.splitlines() cleaned_lines = [clean_line(line) for line in lines] return "\n".join(cleaned_lines) # ----------------------------------- # Gradio Interface # ----------------------------------- def process_text(raw_text): """ This function is called by Gradio when the user clicks the button. It returns two outputs: 1) The cleaned text (for display) 2) A temporary file path with the cleaned text (for download) """ cleaned = clean_text(raw_text) # Write the cleaned text to a temporary file for download tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") tmp.write(cleaned) tmp.flush() tmp.close() return cleaned, tmp.name # Build the interface with gr.Blocks() as demo: gr.Markdown("## Clean Text Tool") gr.Markdown( "Paste your raw/unprocessed text below, then click 'Clean Text' to get the cleaned result." ) with gr.Row(): with gr.Column(): raw_text = gr.Textbox( label="Input (Paste uncleaned text)", lines=15, placeholder="Paste any length of text here...", ) with gr.Column(): cleaned_output = gr.Textbox( label="Output (Cleaned text)", lines=15, interactive=False ) # Button to trigger cleaning button = gr.Button("Clean Text") # We'll show the file download output in a second row download_file = gr.File(label="Download Cleaned .txt File") # Connect the function to the button button.click( fn=process_text, inputs=raw_text, outputs=[cleaned_output, download_file] ) # Run the app if __name__ == "__main__": demo.launch()