know-my-doc / docs /index.html
sjain15's picture
feat: Added know-my-doc-code
cb35b85
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>app.py</title>
<link rel="stylesheet" href="pycco.css">
</head>
<body>
<div id='container'>
<div id="background"></div>
<div class='section'>
<div class='docs'><h1>app.py</h1></div>
</div>
<div class='clearall'>
<div class='section' id='section-0'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-0'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">from</span> <span class="nn">flask</span> <span class="kn">import</span> <span class="n">Flask</span><span class="p">,</span> <span class="n">request</span><span class="p">,</span> <span class="n">jsonify</span><span class="p">,</span> <span class="n">render_template</span>
<span class="kn">from</span> <span class="nn">langchain.chains.question_answering</span> <span class="kn">import</span> <span class="n">load_qa_chain</span>
<span class="kn">from</span> <span class="nn">langchain.document_loaders</span> <span class="kn">import</span> <span class="n">DirectoryLoader</span>
<span class="kn">from</span> <span class="nn">langchain.llms</span> <span class="kn">import</span> <span class="n">OpenAIChat</span>
<span class="kn">from</span> <span class="nn">langchain.prompts</span> <span class="kn">import</span> <span class="n">PromptTemplate</span>
<span class="kn">from</span> <span class="nn">langchain.memory</span> <span class="kn">import</span> <span class="n">ConversationBufferMemory</span>
<span class="kn">from</span> <span class="nn">langchain.document_loaders</span> <span class="kn">import</span> <span class="n">WebBaseLoader</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">from</span> <span class="nn">langchain.embeddings</span> <span class="kn">import</span> <span class="n">OpenAIEmbeddings</span>
<span class="kn">from</span> <span class="nn">langchain.text_splitter</span> <span class="kn">import</span> <span class="n">CharacterTextSplitter</span>
<span class="kn">from</span> <span class="nn">langchain.embeddings.openai</span> <span class="kn">import</span> <span class="n">OpenAIEmbeddings</span>
<span class="kn">from</span> <span class="nn">langchain.vectorstores</span> <span class="kn">import</span> <span class="n">Chroma</span>
<span class="kn">import</span> <span class="nn">nltk</span>
<span class="n">nltk</span><span class="o">.</span><span class="n">download</span><span class="p">(</span><span class="s2">&quot;punkt&quot;</span><span class="p">)</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-1'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-1'>#</a>
</div>
<p>Set up logging</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="n">logging</span><span class="o">.</span><span class="n">basicConfig</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="vm">__name__</span><span class="p">)</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-2'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-2'>#</a>
</div>
<p>Load configuration from YAML file</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s2">&quot;config.yaml&quot;</span><span class="p">,</span> <span class="s2">&quot;r&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">config</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">safe_load</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">&quot;OPENAI_API_KEY&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s2">&quot;openai_api_key&quot;</span><span class="p">]</span>
<span class="n">template_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="s2">&quot;templates&quot;</span><span class="p">)</span>
<span class="n">app</span> <span class="o">=</span> <span class="n">Flask</span><span class="p">(</span><span class="vm">__name__</span><span class="p">,</span> <span class="n">template_folder</span><span class="o">=</span><span class="n">template_dir</span><span class="p">,</span> <span class="n">static_folder</span><span class="o">=</span><span class="s2">&quot;static&quot;</span><span class="p">)</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-3'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-3'>#</a>
</div>
<p>Load the files</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="n">loader</span> <span class="o">=</span> <span class="n">DirectoryLoader</span><span class="p">(</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;data_directory&quot;</span><span class="p">],</span> <span class="n">glob</span><span class="o">=</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;data_files_glob&quot;</span><span class="p">])</span>
<span class="n">docs</span> <span class="o">=</span> <span class="n">loader</span><span class="o">.</span><span class="n">load</span><span class="p">()</span>
<span class="n">webpages</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;webpages&quot;</span><span class="p">,</span> <span class="p">[])</span>
<span class="n">web_docs</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">webpage</span> <span class="ow">in</span> <span class="n">webpages</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Loading data from webpage </span><span class="si">{</span><span class="n">webpage</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="n">loader</span> <span class="o">=</span> <span class="n">WebBaseLoader</span><span class="p">(</span><span class="n">webpage</span><span class="p">)</span>
<span class="n">web_docs</span> <span class="o">+=</span> <span class="n">loader</span><span class="o">.</span><span class="n">load</span><span class="p">()</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">docs</span> <span class="o">+</span> <span class="n">web_docs</span>
<span class="n">tone</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;tone&quot;</span><span class="p">,</span> <span class="s2">&quot;default&quot;</span><span class="p">)</span>
<span class="n">persona</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;persona&quot;</span><span class="p">,</span> <span class="s2">&quot;default&quot;</span><span class="p">)</span>
<span class="n">text_splitter</span> <span class="o">=</span> <span class="n">CharacterTextSplitter</span><span class="p">(</span><span class="n">chunk_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">chunk_overlap</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="n">texts</span> <span class="o">=</span> <span class="n">text_splitter</span><span class="o">.</span><span class="n">split_documents</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="n">embeddings</span> <span class="o">=</span> <span class="n">OpenAIEmbeddings</span><span class="p">(</span><span class="n">openai_api_key</span><span class="o">=</span><span class="n">config</span><span class="p">[</span><span class="s2">&quot;openai_api_key&quot;</span><span class="p">])</span>
<span class="n">docsearch</span> <span class="o">=</span> <span class="n">Chroma</span><span class="o">.</span><span class="n">from_documents</span><span class="p">(</span><span class="n">texts</span><span class="p">,</span> <span class="n">embeddings</span><span class="p">)</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-4'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-4'>#</a>
</div>
<p>Initialize the QA chain</p>
</div>
<div class='code'>
<div class="highlight"><pre><span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Initializing QA chain...&quot;</span><span class="p">)</span>
<span class="n">chain</span> <span class="o">=</span> <span class="n">load_qa_chain</span><span class="p">(</span>
<span class="n">OpenAIChat</span><span class="p">(),</span>
<span class="n">chain_type</span><span class="o">=</span><span class="s2">&quot;stuff&quot;</span><span class="p">,</span>
<span class="n">memory</span><span class="o">=</span><span class="n">ConversationBufferMemory</span><span class="p">(</span><span class="n">memory_key</span><span class="o">=</span><span class="s2">&quot;chat_history&quot;</span><span class="p">,</span> <span class="n">input_key</span><span class="o">=</span><span class="s2">&quot;human_input&quot;</span><span class="p">),</span>
<span class="n">prompt</span><span class="o">=</span><span class="n">PromptTemplate</span><span class="p">(</span>
<span class="n">input_variables</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;chat_history&quot;</span><span class="p">,</span> <span class="s2">&quot;human_input&quot;</span><span class="p">,</span> <span class="s2">&quot;context&quot;</span><span class="p">,</span> <span class="s2">&quot;tone&quot;</span><span class="p">,</span> <span class="s2">&quot;persona&quot;</span><span class="p">],</span>
<span class="n">template</span><span class="o">=</span><span class="s2">&quot;&quot;&quot;You are a chatbot who acts like </span><span class="si">{persona}</span><span class="s2">, having a conversation with a human.</span>
<span class="s2">Given the following extracted parts of a long document and a question, create a final answer only in the </span><span class="si">{tone}</span><span class="s2"> tone. Use only the sources in the document to create a response. Always quote the source in the end&quot;</span>
<span class="si">{context}</span>
<span class="si">{chat_history}</span>
<span class="s2">Human: </span><span class="si">{human_input}</span>
<span class="s2">Chatbot:&quot;&quot;&quot;</span><span class="p">,</span>
<span class="p">),</span>
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="p">)</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-5'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-5'>#</a>
</div>
</div>
<div class='code'>
<div class="highlight"><pre><span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">&quot;/&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">index</span><span class="p">():</span>
<span class="k">return</span> <span class="n">render_template</span><span class="p">(</span><span class="s2">&quot;index.html&quot;</span><span class="p">)</span>
<span class="nd">@app</span><span class="o">.</span><span class="n">route</span><span class="p">(</span><span class="s2">&quot;/api/chat&quot;</span><span class="p">,</span> <span class="n">methods</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;POST&quot;</span><span class="p">])</span>
<span class="k">def</span> <span class="nf">chat</span><span class="p">():</span>
<span class="k">try</span><span class="p">:</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-6'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-6'>#</a>
</div>
<p>Get the question from the request</p>
</div>
<div class='code'>
<div class="highlight"><pre> <span class="n">question</span> <span class="o">=</span> <span class="n">request</span><span class="o">.</span><span class="n">json</span><span class="p">[</span><span class="s2">&quot;question&quot;</span><span class="p">]</span>
<span class="n">documents</span> <span class="o">=</span> <span class="n">docsearch</span><span class="o">.</span><span class="n">similarity_search</span><span class="p">(</span><span class="n">question</span><span class="p">,</span> <span class="n">include_metadata</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-7'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-7'>#</a>
</div>
<p>Get the bot&rsquo;s response</p>
</div>
<div class='code'>
<div class="highlight"><pre> <span class="n">response</span> <span class="o">=</span> <span class="n">chain</span><span class="p">(</span>
<span class="p">{</span>
<span class="s2">&quot;input_documents&quot;</span><span class="p">:</span> <span class="n">documents</span><span class="p">,</span>
<span class="s2">&quot;human_input&quot;</span><span class="p">:</span> <span class="n">question</span><span class="p">,</span>
<span class="s2">&quot;tone&quot;</span><span class="p">:</span> <span class="n">tone</span><span class="p">,</span>
<span class="s2">&quot;persona&quot;</span><span class="p">:</span> <span class="n">persona</span><span class="p">,</span>
<span class="p">},</span>
<span class="n">return_only_outputs</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="p">)[</span><span class="s2">&quot;output_text&quot;</span><span class="p">]</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-8'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-8'>#</a>
</div>
<p>Return the response as JSON</p>
</div>
<div class='code'>
<div class="highlight"><pre> <span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">&quot;response&quot;</span><span class="p">:</span> <span class="n">response</span><span class="p">})</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span></pre></div>
</div>
</div>
<div class='clearall'></div>
<div class='section' id='section-9'>
<div class='docs'>
<div class='octowrap'>
<a class='octothorpe' href='#section-9'>#</a>
</div>
<p>Log the error and return an error response</p>
</div>
<div class='code'>
<div class="highlight"><pre> <span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Error while processing request: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">jsonify</span><span class="p">({</span><span class="s2">&quot;error&quot;</span><span class="p">:</span> <span class="s2">&quot;Unable to process the request.&quot;</span><span class="p">}),</span> <span class="mi">500</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">app</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">debug</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class='clearall'></div>
</div>
</body>