Omarrran's picture
Create app.py
2a831b5 verified
raw
history blame
3.69 kB
# clean_app.py
# -----------------------------------
# Copy_right CC _developed By HNM
# -----------------------------------
import re
import gradio as gr
import tempfile
# STEP A
mappings = {
"\x04ٲ": "ٲ",
"\x04ُ": "ُ",
"\x04ٚ": "ٚ",
"\x04ٕ": "ٕ",
"\x04ٛ": "ٛ",
"\x04ٔ": "ٔ",
"\x04ں": "ں",
"\x04": "", # if stray \x04 alone, remove or replace as needed
# ASCII symbol mappings
">": "ٲ",
"<": "ُ",
";": "ٚ",
"=": "ٕ",
":": "ٛ",
".": "ٔ",
",": "ں",
"/": "" # remove slash
}
def basic_replacements(text: str) -> str:
"""
Apply the dictionary-based .replace() calls for
the \x04 combos and ASCII symbols.
"""
for old, new in mappings.items():
text = text.replace(old, new)
return text
# STEP B
def fix_alif_combo(text: str) -> str:
""" Replace any occurrence of 'اٲ' with 'ٲ'. """
return text.replace("اٲ", "ٲ")
# STEP C
def fix_question_mark(text: str) -> str:
"""
For each occurrence of (.)?(.) => remove '?', add "یٕ" to the 2nd letter.
E.g. "س?ت" => "ستیٕ".
"""
def _repl(m):
first_char = m.group(1) # the character before '?'
second_char = m.group(2) # the character after '?'
return f"{first_char}{second_char}یٕ"
pattern = r"(.)\?(.)"
return re.sub(pattern, _repl, text)
# STEP D
def clean_line(line: str) -> str:
"""
Cleans a single line using:
1) basic replacements (\x04 combos, ASCII symbols),
2) fix_alif_combo (اٲ -> ٲ),
3) fix_question_mark (س?ت -> ستیٕ)
"""
line = basic_replacements(line)
line = fix_alif_combo(line)
line = fix_question_mark(line)
return line
# Helper: cleans the entire string (multiple lines).
def clean_text(input_text: str) -> str:
# Split into lines, clean each, then join
lines = input_text.splitlines()
cleaned_lines = [clean_line(line) for line in lines]
return "\n".join(cleaned_lines)
# -----------------------------------
# Gradio Interface
# -----------------------------------
def process_text(raw_text):
"""
This function is called by Gradio when the user clicks the button.
It returns two outputs:
1) The cleaned text (for display)
2) A temporary file path with the cleaned text (for download)
"""
cleaned = clean_text(raw_text)
# Write the cleaned text to a temporary file for download
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
tmp.write(cleaned)
tmp.flush()
tmp.close()
return cleaned, tmp.name
# Build the interface
with gr.Blocks() as demo:
gr.Markdown("## Clean Text Tool")
gr.Markdown(
"Paste your raw/unprocessed text below, then click 'Clean Text' to get the cleaned result."
)
with gr.Row():
with gr.Column():
raw_text = gr.Textbox(
label="Input (Paste uncleaned text)",
lines=15,
placeholder="Paste any length of text here...",
)
with gr.Column():
cleaned_output = gr.Textbox(
label="Output (Cleaned text)",
lines=15,
interactive=False
)
# Button to trigger cleaning
button = gr.Button("Clean Text")
# We'll show the file download output in a second row
download_file = gr.File(label="Download Cleaned .txt File")
# Connect the function to the button
button.click(
fn=process_text,
inputs=raw_text,
outputs=[cleaned_output, download_file]
)
# Run the app
if __name__ == "__main__":
demo.launch()