Spaces:

bardicreels
/

rag

Running

user commited on Sep 11, 2024

Commit

1591490

1 Parent(s): 176bc9a

updates

Files changed (2) hide show

README.md CHANGED Viewed

@@ -1,8 +1,7 @@
 ---
-title: Rag PDF Chatbot
 emoji: 🤖
 colorFrom: blue
 colorTo: red
 sdk: static
-app_port: 7860
 ---

 ---
+title: rag
 emoji: 🤖
 colorFrom: blue
 colorTo: red
 sdk: static
 ---

Untitled-2 DELETED Viewed

@@ -1,23 +0,0 @@
-import fitz
-import faiss
-import numpy as np
-import torch
-from model_loader import load_model
-def extract_text_from_pdf(file_path):
-    with fitz.open(file_path) as doc:
-        return " ".join(page.get_text() for page in doc)
-def process_pdf(pdf_text):
-    chunks = [pdf_text[i:i+512] for i in range(0, len(pdf_text), 512)]
-    tokenizer, model = load_model()
-    embeddings = []
-    for chunk in chunks:
-        inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
-        with torch.no_grad():
-            outputs = model(**inputs)
-        embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
-    embeddings = np.array(embeddings)
-    index = faiss.IndexFlatL2(embeddings.shape[1])
-    index.add(embeddings.astype('float32'))
-    return chunks, index