File size: 6,313 Bytes
e67043b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import requests
from bs4 import BeautifulSoup
from ..tool import Tool
from enum import Enum
from typing import Tuple


#  search result list chunk size
SEARCH_RESULT_LIST_CHUNK_SIZE = 3
#  result target page text chunk content length
RESULT_TARGET_PAGE_PER_TEXT_COUNT = 500


class BingAPI:
    """
    A class for performing searches on the Bing search engine.

    Attributes
    ----------
    bing_api : BingAPI
        The Bing API to use for performing searches.

    Methods
    -------
    __init__(self, subscription_key: str) -> None:
        Initialize the BingSearch instance with the given subscription key.
    search_top3(self, key_words: str) -> List[str]:
        Perform a search on the Bing search engine with the given keywords and return the top 3 search results.
    load_page_index(self, idx: int) -> str:
        Load the detailed page of the search result at the given index.
    """

    def __init__(self, subscription_key: str) -> None:
        """
        Initialize the BingSearch instance with the given subscription key.

        Parameters
        ----------
        subscription_key : str
            The subscription key to use for the Bing API.
        """
        self._headers = {"Ocp-Apim-Subscription-Key": subscription_key}
        self._endpoint = "https://api.bing.microsoft.com/v7.0/search"
        self._mkt = "en-US"

    def search(self, key_words: str, max_retry: int = 3):
        for _ in range(max_retry):
            try:
                result = requests.get(
                    self._endpoint,
                    headers=self._headers,
                    params={"q": key_words, "mkt": self._mkt},
                    timeout=10,
                )
            except Exception:
                # failed, retry
                continue

            if result.status_code == 200:
                result = result.json()
                # search result returned here
                return result
            else:
                # failed, retry
                continue
        raise RuntimeError("Failed to access Bing Search API.")

    def load_page(self, url: str, max_retry: int = 3) -> Tuple[bool, str]:
        for _ in range(max_retry):
            try:
                res = requests.get(url, timeout=15)
                if res.status_code == 200:
                    res.raise_for_status()
                else:
                    raise RuntimeError(
                        "Failed to load page, code {}".format(res.status_code)
                    )
            except Exception:
                # failed, retry
                res = None
                continue
            res.encoding = res.apparent_encoding
            content = res.text
            break
        if res is None:
            return (
                False,
                "Timeout for loading this page, Please try to load another one or search again.",
            )
        try:
            soup = BeautifulSoup(content, "html.parser")
            paragraphs = soup.find_all("p")
            page_detail = ""
            for p in paragraphs:
                text = p.get_text().strip()
                page_detail += text
            return True, page_detail
        except Exception:
            return (
                False,
                "Timeout for loading this page, Please try to load another one or search again.",
            )


class CONTENT_TYPE(Enum):
    SEARCH_RESULT = 0
    RESULT_TARGET_PAGE = 1


class ContentItem:
    def __init__(self, type: CONTENT_TYPE, data):
        self.type = type
        self.data = data


class DigestData:
    title: str
    desc: str
    chunkIndex: int


class Digest:
    datas: list
    checked: bool


class SessionData:
    topic = None
    content = []
    digests = []
    curResultChunk = 0
    curTargetPageResultChunk = 0


data = SessionData()


def build_tool(config) -> Tool:
    tool = Tool(
        "Bing_search",
        "Bing_search",
        name_for_model="Bing_search",
        name_for_human="Bing_search",
        description_for_model="""Perform Search on Bing Search engine.
Use search_top3(key: str) to get top 3 search results after input the key to search.
Use load_page_index(idx: int) to load the detailed page of the search result.""",
        description_for_human="Bing search API for browsing the internet and search for results.",
        logo_url="https://your-app-url.com/.well-known/logo.png",
        contact_email="[email protected]",
        legal_info_url="[email protected]",
    )

    if "debug" in config and config["debug"]:
        bing_api = config["bing_api"]
    else:
        bing_api = BingAPI(config["subscription_key"])

    @tool.get("/search_top3")
    def search_top3(key_words: str) -> str:
        """Search key words, return top 3 search results."""
        top3 = search_all(key_words)[:3]
        output = ""
        for idx, item in enumerate(top3):
            output += "page: " + str(idx + 1) + "\n"
            output += "title: " + item["name"] + "\n"
            output += "summary: " + item["snippet"] + "\n"
        return output

    def search_all(key_words: str, data: SessionData = data) -> list:
        """Search key_words, return a list of class SearchResult.
        Keyword arguments:
        key_words -- key words want to search
        """
        result = bing_api.search(key_words)
        data.content = []
        data.content.append(ContentItem(CONTENT_TYPE.SEARCH_RESULT, result))
        data.curResultChunk = 0
        return data.content[-1].data["webPages"]["value"]

    @tool.get("/load_page_index")
    def load_page_index(idx: str) -> str:
        """Load page detail of the search result indexed as 'idx', and return the content of the page."""
        idx = int(idx)
        href, text = load_page(idx - 1)
        if len(text) > 500:
            return text[:500]
        else:
            return text

    def load_page(idx: int, data: SessionData = data):
        top = data.content[-1].data["webPages"]["value"]
        ok, content = bing_api.load_page(top[idx]["url"])
        if ok:
            return top[idx]["url"], content
        else:
            return (
                " ",
                "Timeout for loading this page, Please try to load another one or search again.",
            )

    return tool