user commited on
Commit
1591490
·
1 Parent(s): 176bc9a
Files changed (2) hide show
  1. README.md +1 -2
  2. Untitled-2 +0 -23
README.md CHANGED
@@ -1,8 +1,7 @@
1
  ---
2
- title: Rag PDF Chatbot
3
  emoji: 🤖
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: static
7
- app_port: 7860
8
  ---
 
1
  ---
2
+ title: rag
3
  emoji: 🤖
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: static
 
7
  ---
Untitled-2 DELETED
@@ -1,23 +0,0 @@
1
- import fitz
2
- import faiss
3
- import numpy as np
4
- import torch
5
- from model_loader import load_model
6
-
7
- def extract_text_from_pdf(file_path):
8
- with fitz.open(file_path) as doc:
9
- return " ".join(page.get_text() for page in doc)
10
-
11
- def process_pdf(pdf_text):
12
- chunks = [pdf_text[i:i+512] for i in range(0, len(pdf_text), 512)]
13
- tokenizer, model = load_model()
14
- embeddings = []
15
- for chunk in chunks:
16
- inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
17
- with torch.no_grad():
18
- outputs = model(**inputs)
19
- embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
20
- embeddings = np.array(embeddings)
21
- index = faiss.IndexFlatL2(embeddings.shape[1])
22
- index.add(embeddings.astype('float32'))
23
- return chunks, index