Spaces:
Runtime error
Runtime error
File size: 6,187 Bytes
7cf68b3 6f6b132 7cf68b3 6f6b132 7cf68b3 6f6b132 0a0c342 6f6b132 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from transformers import (
AutoTokenizer,
AutoModelForSeq2SeqLM,
pipeline,
GenerationConfig
)
from textwrap import dedent
class lamini:
def __init__(self):
pass
def load_model(self, task="text2text-generation", **kwargs) -> HuggingFacePipeline:
"""Returns a pipeline for the model
- model: MBZUAI/LaMini-Flan-T5-248M
Returns:
_type_: _description_
"""
model_id = "MBZUAI/LaMini-Flan-T5-248M"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
gen_config = GenerationConfig.from_pretrained(model_id)
max_length = kwargs.get("max_length", 512)
temperature = kwargs.get("temperature", 0)
top_p = kwargs.get("top_p", 0.95)
repetition_penalty = kwargs.get("repetition_penalty", 1.15)
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
generation_config=gen_config,
max_length=max_length,
top_p=top_p,
temperature=temperature,
repetition_penalty=repetition_penalty,
)
llm = HuggingFacePipeline(pipeline=pipe)
return llm
class templates:
def __init__(self, llm: HuggingFacePipeline):
self.llm = llm
def summarize(self, text, **kwargs):
"""Summarize text
Args:
text (str): text to summarize
Returns:
str: summarized text
"""
instruction = "summarize for better understanding: "
text = instruction + text
return self.llm(text, **kwargs)
def generate_tile(self, text, **kwargs):
"""Generate a title for text
Args:
text (str): text to generate title for
Returns:
str: title
"""
instruction = "generate a title for this text: "
text = instruction + text
return self.llm(text, **kwargs)
class qa_template:
def __init__(self, llm):
from langchain.chains.retrieval_qa.base import BaseRetrievalQA
self.llm = llm
self.qa_inf: BaseRetrievalQA
def load(self, knowledge_base):
"""Load knowledge base
Args:
knowledge_base (str): knowledge base to load
Returns:
BaseRetrievalQA: (optional to use) returns QA interface
"""
from utils import LangChainChunker
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
embeds = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
chunker = LangChainChunker(knowledge_base)
chunks = chunker.chunker(size=512)
db = Chroma.from_texts(chunks, embeds)
retriever = db.as_retriever()
qa_inf = RetrievalQA.from_chain_type(
llm=self.llm, chain_type="stuff", retriever=retriever
)
self.qa_inf = qa_inf
return qa_inf
def start_gradio(self, title: str):
"""Start gradio interface
Returns:
_type_: _description_
"""
import gradio as gr
load = self.load
def interface(msg, history):
res = self.qa_inf.run(msg)
history.append((msg, res))
return "", history
def reload(video_id):
from utils import getSubsText
print(f"Setting up {video_id}")
subs = getSubsText(video_id)
_ = load(subs)
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(dedent(f"""
# video to QA
A test implementation to use vectorstores and mini llms to create
a question answer chatbot interface for _youtube videos_
"""))
chatbot = gr.Chatbot()
with gr.Row():
with gr.Column():
videoId = gr.Textbox(label="Video ID", placeholder="Enter video ID here")
msg = gr.Textbox(label="Question Box" , placeholder="Enter your question here")
clear = gr.ClearButton([msg, videoId, chatbot])
gr.Markdown(
dedent("""
## Getting started
to start up you need to enter the video ID of youtube video first
Get a youtube video which has English dialog
> ex: https://www.youtube.com/watch?v=BsnCpESUEqM
in this `BsnCpESUEqM` is the video ID
```
https://www.youtube.com/watch?v=BsnCpESUEqM
^^^^^^^^^^^
video_id
```
> in url paramets are seperated by `?` and for video id its `?v`
copy-paste the video id to the textbox and press return/enter and wait ~5 seconds to fetch video information
---
Now in the Question Box _box_/feild start typing the quesions and press return/enter to send to llm
""")
)
msg.submit(interface, [msg, chatbot], [msg, chatbot])
videoId.submit(reload, [videoId])
# ui = gr.ChatInterface(
# fn=interface,
# examples=["What is the video about?", "key points of the video"],
# title=f"Question Mode - {title}",
# )
# ui.launch()
demo.launch()
|