Omarrran commited on
Commit
2a831b5
·
verified ·
1 Parent(s): 6ab1122

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # clean_app.py
2
+
3
+ # -----------------------------------
4
+ # Copy_right CC _developed By HNM
5
+ # -----------------------------------
6
+ import re
7
+ import gradio as gr
8
+ import tempfile
9
+
10
+ # STEP A
11
+ mappings = {
12
+ "\x04ٲ": "ٲ",
13
+ "\x04ُ": "ُ",
14
+ "\x04ٚ": "ٚ",
15
+ "\x04ٕ": "ٕ",
16
+ "\x04ٛ": "ٛ",
17
+ "\x04ٔ": "ٔ",
18
+ "\x04ں": "ں",
19
+ "\x04": "", # if stray \x04 alone, remove or replace as needed
20
+
21
+ # ASCII symbol mappings
22
+ ">": "ٲ",
23
+ "<": "ُ",
24
+ ";": "ٚ",
25
+ "=": "ٕ",
26
+ ":": "ٛ",
27
+ ".": "ٔ",
28
+ ",": "ں",
29
+ "/": "" # remove slash
30
+ }
31
+
32
+ def basic_replacements(text: str) -> str:
33
+ """
34
+ Apply the dictionary-based .replace() calls for
35
+ the \x04 combos and ASCII symbols.
36
+ """
37
+ for old, new in mappings.items():
38
+ text = text.replace(old, new)
39
+ return text
40
+
41
+ # STEP B
42
+ def fix_alif_combo(text: str) -> str:
43
+ """ Replace any occurrence of 'اٲ' with 'ٲ'. """
44
+ return text.replace("اٲ", "ٲ")
45
+
46
+ # STEP C
47
+ def fix_question_mark(text: str) -> str:
48
+ """
49
+ For each occurrence of (.)?(.) => remove '?', add "یٕ" to the 2nd letter.
50
+ E.g. "س?ت" => "ستیٕ".
51
+ """
52
+ def _repl(m):
53
+ first_char = m.group(1) # the character before '?'
54
+ second_char = m.group(2) # the character after '?'
55
+ return f"{first_char}{second_char}یٕ"
56
+
57
+ pattern = r"(.)\?(.)"
58
+ return re.sub(pattern, _repl, text)
59
+
60
+ # STEP D
61
+ def clean_line(line: str) -> str:
62
+ """
63
+ Cleans a single line using:
64
+ 1) basic replacements (\x04 combos, ASCII symbols),
65
+ 2) fix_alif_combo (اٲ -> ٲ),
66
+ 3) fix_question_mark (س?ت -> ستیٕ)
67
+ """
68
+ line = basic_replacements(line)
69
+ line = fix_alif_combo(line)
70
+ line = fix_question_mark(line)
71
+ return line
72
+
73
+ # Helper: cleans the entire string (multiple lines).
74
+ def clean_text(input_text: str) -> str:
75
+ # Split into lines, clean each, then join
76
+ lines = input_text.splitlines()
77
+ cleaned_lines = [clean_line(line) for line in lines]
78
+ return "\n".join(cleaned_lines)
79
+
80
+
81
+ # -----------------------------------
82
+ # Gradio Interface
83
+ # -----------------------------------
84
+ def process_text(raw_text):
85
+ """
86
+ This function is called by Gradio when the user clicks the button.
87
+ It returns two outputs:
88
+ 1) The cleaned text (for display)
89
+ 2) A temporary file path with the cleaned text (for download)
90
+ """
91
+ cleaned = clean_text(raw_text)
92
+
93
+ # Write the cleaned text to a temporary file for download
94
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
95
+ tmp.write(cleaned)
96
+ tmp.flush()
97
+ tmp.close()
98
+
99
+ return cleaned, tmp.name
100
+
101
+
102
+ # Build the interface
103
+ with gr.Blocks() as demo:
104
+ gr.Markdown("## Clean Text Tool")
105
+ gr.Markdown(
106
+ "Paste your raw/unprocessed text below, then click 'Clean Text' to get the cleaned result."
107
+ )
108
+
109
+ with gr.Row():
110
+ with gr.Column():
111
+ raw_text = gr.Textbox(
112
+ label="Input (Paste uncleaned text)",
113
+ lines=15,
114
+ placeholder="Paste any length of text here...",
115
+ )
116
+ with gr.Column():
117
+ cleaned_output = gr.Textbox(
118
+ label="Output (Cleaned text)",
119
+ lines=15,
120
+ interactive=False
121
+ )
122
+
123
+ # Button to trigger cleaning
124
+ button = gr.Button("Clean Text")
125
+
126
+ # We'll show the file download output in a second row
127
+ download_file = gr.File(label="Download Cleaned .txt File")
128
+
129
+ # Connect the function to the button
130
+ button.click(
131
+ fn=process_text,
132
+ inputs=raw_text,
133
+ outputs=[cleaned_output, download_file]
134
+ )
135
+
136
+ # Run the app
137
+ if __name__ == "__main__":
138
+ demo.launch()