Spaces:
Runtime error
Runtime error
File size: 6,313 Bytes
e67043b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import requests
from bs4 import BeautifulSoup
from ..tool import Tool
from enum import Enum
from typing import Tuple
# search result list chunk size
SEARCH_RESULT_LIST_CHUNK_SIZE = 3
# result target page text chunk content length
RESULT_TARGET_PAGE_PER_TEXT_COUNT = 500
class BingAPI:
"""
A class for performing searches on the Bing search engine.
Attributes
----------
bing_api : BingAPI
The Bing API to use for performing searches.
Methods
-------
__init__(self, subscription_key: str) -> None:
Initialize the BingSearch instance with the given subscription key.
search_top3(self, key_words: str) -> List[str]:
Perform a search on the Bing search engine with the given keywords and return the top 3 search results.
load_page_index(self, idx: int) -> str:
Load the detailed page of the search result at the given index.
"""
def __init__(self, subscription_key: str) -> None:
"""
Initialize the BingSearch instance with the given subscription key.
Parameters
----------
subscription_key : str
The subscription key to use for the Bing API.
"""
self._headers = {"Ocp-Apim-Subscription-Key": subscription_key}
self._endpoint = "https://api.bing.microsoft.com/v7.0/search"
self._mkt = "en-US"
def search(self, key_words: str, max_retry: int = 3):
for _ in range(max_retry):
try:
result = requests.get(
self._endpoint,
headers=self._headers,
params={"q": key_words, "mkt": self._mkt},
timeout=10,
)
except Exception:
# failed, retry
continue
if result.status_code == 200:
result = result.json()
# search result returned here
return result
else:
# failed, retry
continue
raise RuntimeError("Failed to access Bing Search API.")
def load_page(self, url: str, max_retry: int = 3) -> Tuple[bool, str]:
for _ in range(max_retry):
try:
res = requests.get(url, timeout=15)
if res.status_code == 200:
res.raise_for_status()
else:
raise RuntimeError(
"Failed to load page, code {}".format(res.status_code)
)
except Exception:
# failed, retry
res = None
continue
res.encoding = res.apparent_encoding
content = res.text
break
if res is None:
return (
False,
"Timeout for loading this page, Please try to load another one or search again.",
)
try:
soup = BeautifulSoup(content, "html.parser")
paragraphs = soup.find_all("p")
page_detail = ""
for p in paragraphs:
text = p.get_text().strip()
page_detail += text
return True, page_detail
except Exception:
return (
False,
"Timeout for loading this page, Please try to load another one or search again.",
)
class CONTENT_TYPE(Enum):
SEARCH_RESULT = 0
RESULT_TARGET_PAGE = 1
class ContentItem:
def __init__(self, type: CONTENT_TYPE, data):
self.type = type
self.data = data
class DigestData:
title: str
desc: str
chunkIndex: int
class Digest:
datas: list
checked: bool
class SessionData:
topic = None
content = []
digests = []
curResultChunk = 0
curTargetPageResultChunk = 0
data = SessionData()
def build_tool(config) -> Tool:
tool = Tool(
"Bing_search",
"Bing_search",
name_for_model="Bing_search",
name_for_human="Bing_search",
description_for_model="""Perform Search on Bing Search engine.
Use search_top3(key: str) to get top 3 search results after input the key to search.
Use load_page_index(idx: int) to load the detailed page of the search result.""",
description_for_human="Bing search API for browsing the internet and search for results.",
logo_url="https://your-app-url.com/.well-known/logo.png",
contact_email="[email protected]",
legal_info_url="[email protected]",
)
if "debug" in config and config["debug"]:
bing_api = config["bing_api"]
else:
bing_api = BingAPI(config["subscription_key"])
@tool.get("/search_top3")
def search_top3(key_words: str) -> str:
"""Search key words, return top 3 search results."""
top3 = search_all(key_words)[:3]
output = ""
for idx, item in enumerate(top3):
output += "page: " + str(idx + 1) + "\n"
output += "title: " + item["name"] + "\n"
output += "summary: " + item["snippet"] + "\n"
return output
def search_all(key_words: str, data: SessionData = data) -> list:
"""Search key_words, return a list of class SearchResult.
Keyword arguments:
key_words -- key words want to search
"""
result = bing_api.search(key_words)
data.content = []
data.content.append(ContentItem(CONTENT_TYPE.SEARCH_RESULT, result))
data.curResultChunk = 0
return data.content[-1].data["webPages"]["value"]
@tool.get("/load_page_index")
def load_page_index(idx: str) -> str:
"""Load page detail of the search result indexed as 'idx', and return the content of the page."""
idx = int(idx)
href, text = load_page(idx - 1)
if len(text) > 500:
return text[:500]
else:
return text
def load_page(idx: int, data: SessionData = data):
top = data.content[-1].data["webPages"]["value"]
ok, content = bing_api.load_page(top[idx]["url"])
if ok:
return top[idx]["url"], content
else:
return (
" ",
"Timeout for loading this page, Please try to load another one or search again.",
)
return tool
|