tracinginsights commited on
Commit
2eee6ac
·
1 Parent(s): 044109a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -2
app.py CHANGED
@@ -14,6 +14,17 @@ from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
14
 
15
 
16
  import string
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  URL = "https://www.formula1.com/content/fom-website/en/latest/all.xml"
19
 
@@ -22,6 +33,7 @@ def get_xml(url):
22
  # use urllib.parse to check for formula1.com website or other news
23
  xml = pd.read_xml(url,xpath='channel/item')
24
 
 
25
 
26
 
27
  # care taken to only consider results where there are more words not a single word quotes
@@ -127,6 +139,112 @@ def remove_punctuations(text):
127
 
128
  return modified_text
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  def check_updates(every=300):
132
  while True:
@@ -138,10 +256,11 @@ def check_updates(every=300):
138
 
139
  # loops through new articles and gets the necessary text, quotes and speakers
140
  dfs_dict = get_text(new_articles_df)
141
-
 
142
 
143
  else:
144
  print('No New article is found')
145
 
146
-
147
 
 
14
 
15
 
16
  import string
17
+ import textwrap
18
+ import tweepy
19
+ import gradio as gr
20
+
21
+
22
+
23
+ def image_classifier(inp):
24
+ return {'cat': 0.3, 'dog': 0.7}
25
+
26
+ demo = gr.Interface(fn=image_classifier, inputs="image", outputs="text", analytics_enabled=True)
27
+ demo.launch(max_threads=1,auth=("username", "password"), show_api=False)
28
 
29
  URL = "https://www.formula1.com/content/fom-website/en/latest/all.xml"
30
 
 
33
  # use urllib.parse to check for formula1.com website or other news
34
  xml = pd.read_xml(url,xpath='channel/item')
35
 
36
+ previous_xml = get_xml(URL)
37
 
38
 
39
  # care taken to only consider results where there are more words not a single word quotes
 
139
 
140
  return modified_text
141
 
142
+
143
+ def get_speaker_quotes(dfs_dict, question_answerer):
144
+
145
+ speaker_quote = []
146
+
147
+
148
+ for link in tqdm(dfs_dict):
149
+ context = dfs_dict[link]['context']
150
+ quotes = dfs_dict[link]['quotes']
151
+ potential_speakers = dfs_dict[link]['speakers']
152
+ if len(quotes) != 0:
153
+ #loop through the list of quotes
154
+ for quote in quotes:
155
+ # max_seq_len == 384 : https://huggingface.co/deepset/roberta-base-squad2
156
+ full_quote = quote
157
+ if len(quote) >380:
158
+ quote = quote[:384]
159
+
160
+
161
+ speaker_dict = question_answerer(question=f"Who said '{quote}'?", context=context)
162
+
163
+ speaker = speaker_dict['answer']
164
+ if len(speaker) >0:
165
+ speaker = remove_punctuations(speaker_dict['answer'])
166
+
167
+
168
+
169
+ if speaker not in potential_speakers:
170
+ speaker = ""
171
+ quote = ""
172
+ else:
173
+ pair = {'speaker':speaker, 'quote': quote, 'source':link}
174
+ speaker_quote.append(pair)
175
+
176
+ return speaker_quote
177
+
178
+
179
+ api_key = "Ua2l5yDMiS1sMZUIJlU6sIAPF"
180
+ secret_api_key = "N3AtrZQ7wYmUfoJuG7zdvSTrZSEWT8p6VwgmNYsLyGC1OHN15I"
181
+ access_token = "1438916815980171264-SFsqSXgn0oSqdkpsXbSvOLhmDKFkVh"
182
+ secret_access_token = "R7RXNROT6lVLQ8SEEk0iZr7D6NINJpTqy57tkMEgjeAJq"
183
+
184
+ bearer_token = "AAAAAAAAAAAAAAAAAAAAALi8iwEAAAAAXsnZnVR7KNbf%2B6k0dTYY4Rkv%2Bso%3DMzxbsVnMQv6yuWazmCRxPOCPgLuujmGl4SLrdCCvcBFKxc9YgL"
185
+
186
+ def post_to_twitter():
187
+ twitter_api_key = api_key
188
+ twitter_secret_api_key = secret_api_key
189
+ twitter_access_token = access_token
190
+ twitter_secret_access_token = secret_access_token
191
+ twitter_bearer_token = bearer_token
192
+
193
+ api = tweepy.Client(bearer_token=twitter_bearer_token, consumer_key=twitter_api_key,
194
+ consumer_secret=twitter_secret_api_key, access_token=twitter_access_token,
195
+ access_token_secret=twitter_secret_access_token,wait_on_rate_limit=True
196
+ )
197
+ #tweet = api.create_tweet(text=post_title, in_reply_to_tweet_id=in_reply_to_tweet_id)
198
+
199
+ return api
200
+
201
+
202
+
203
+ def split_near_space(string, max_length):
204
+ # Split the string into lines based on the maximum line width, breaking only at spaces
205
+ lines = textwrap.wrap(string, width=max_length,)
206
+ return lines
207
+
208
+ def send_tweets(speaker_quote):
209
+ for i, pair in enumerate(speaker_quote):
210
+ speaker = pair['speaker']
211
+ quote = pair['quote']
212
+ source = pair['source']
213
+
214
+ total_tweet_length = len(speaker) + len(quote) + 10 # 10 is for emojis and #f1 hashtag
215
+
216
+ tweet_text = f"🗣️ | {speaker}: '{quote}'"
217
+
218
+ api = post_to_twitter()
219
+
220
+
221
+ if total_tweet_length < 280:
222
+ try:
223
+ first_tweet = api.create_tweet(text=tweet_text, )
224
+ first_tweet_id = first_tweet.data['id']
225
+ second_tweet = api.create_tweet(text=f"Source: {source}", in_reply_to_tweet_id=first_tweet_id)
226
+ except:
227
+ continue
228
+
229
+ else:
230
+ quotes_list = split_near_space(quote, 280 - len(speaker) -10)
231
+ thread_id = None
232
+ try:
233
+ for i, quote in enumerate(quotes_list):
234
+ tweet_text = f"'...{quote}...'"
235
+ if i == 0:
236
+ tweet_text = f"🗣️ | {speaker}: '{quote}...'"
237
+
238
+ if i ==len(quotes_list) -1:
239
+ tweet_text = f"'...{quote}'"
240
+
241
+ recent_tweet = api.create_tweet(text=tweet_text, in_reply_to_tweet_id=thread_id)
242
+ thread_id = recent_tweet.data['id']
243
+
244
+
245
+ last_tweet = api.create_tweet(text=f"Source: {source}", in_reply_to_tweet_id=thread_id)
246
+ except:
247
+ continue
248
 
249
  def check_updates(every=300):
250
  while True:
 
256
 
257
  # loops through new articles and gets the necessary text, quotes and speakers
258
  dfs_dict = get_text(new_articles_df)
259
+ speaker_quote = get_speaker_quotes(dfs_dict, question_answerer)
260
+ send_tweets(speaker_quote)
261
 
262
  else:
263
  print('No New article is found')
264
 
265
+ check_updates(300)
266