dammy commited on
Commit
301614f
·
1 Parent(s): 60a0b82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -38
app.py CHANGED
@@ -1,49 +1,26 @@
1
- # import gradio as gr
2
- # from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
3
- # from langchain.text_splitter import CharacterTextSplitter
4
-
5
-
6
- # def extract_text(pdf_file):
7
-
8
- # # Load a document
9
- # loader = PDFMinerLoader(pdf_file)
10
- # doc = loader.load()
11
-
12
- # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
13
- # texts = text_splitter.split_documents(doc)
14
-
15
- # texts = [i.page_content for i in texts]
16
-
17
- # return texts[0]
18
-
19
-
20
- # # def upload_file(file):
21
- # # return file.name
22
-
23
- # # with gr.Blocks() as demo:
24
- # # file_output = gr.File()
25
- # # upload_button = gr.UploadButton("Click to Upload a File", file_types="file")
26
- # # upload_button.upload(upload_file, upload_button, file_output)
27
-
28
-
29
- # gr.inputs.File(label="upload file")
30
 
31
- # iface = gr.Interface(
32
- # fn=extract_text,
33
- # inputs=gr.File(type="filepath", label="Upload PDF"),
34
- # outputs="text"
35
- # )
36
 
37
- # iface.launch()
38
 
39
  import gradio as gr
40
 
41
  def upload_pdf(file):
42
  # Save the uploaded file
43
  file_name = file.name
44
- # file.save(file_name)
45
-
46
- return file_name
 
 
 
 
 
 
 
 
 
47
 
48
  iface = gr.Interface(
49
  fn=upload_pdf,
 
1
+ import gradio as gr
2
+ from langchain.document_loaders import PDFMinerLoader, PyMuPDFLoader
3
+ from langchain.text_splitter import CharacterTextSplitter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
 
 
 
 
 
5
 
 
6
 
7
  import gradio as gr
8
 
9
  def upload_pdf(file):
10
  # Save the uploaded file
11
  file_name = file.name
12
+ pdf_filename = os.path.basename(file_path)
13
+
14
+ # Load a document
15
+ loader = PDFMinerLoader(pdf_filename)
16
+ doc = loader.load()
17
+
18
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
19
+ texts = text_splitter.split_documents(doc)
20
+
21
+ texts = [i.page_content for i in texts]
22
+
23
+ return texts[0]
24
 
25
  iface = gr.Interface(
26
  fn=upload_pdf,