AliMustapha commited on
Commit
1ffb898
·
1 Parent(s): 46236ad

code cleaning

Browse files
Dictionary_guesser/dsutil.py CHANGED
@@ -1,7 +1,5 @@
1
  #!/usr/bin/env python3
2
 
3
- __copyright__ = "Copyright (C) 2022 Davide Rossi"
4
- __license__ = "GPL-3.0-or-later"
5
 
6
  import pandas as pd
7
  import numpy as np
 
1
  #!/usr/bin/env python3
2
 
 
 
3
 
4
  import pandas as pd
5
  import numpy as np
Dictionary_guesser/name_maker.py CHANGED
@@ -1,7 +1,5 @@
1
  #!/usr/bin/env python3
2
 
3
- __copyright__ = "Copyright (C) 2022 Davide Rossi"
4
- __license__ = "GPL-3.0-or-later"
5
 
6
  import pandas as pd
7
  import numpy as np
 
1
  #!/usr/bin/env python3
2
 
 
 
3
 
4
  import pandas as pd
5
  import numpy as np
Dictionary_guesser/name_nation_guesser.py CHANGED
@@ -1,7 +1,6 @@
1
  #!/usr/bin/env python3
2
 
3
- __copyright__ = "Copyright (C) 2022 Davide Rossi"
4
- __license__ = "GPL-3.0-or-later"
5
 
6
  import pandas as pd
7
  import numpy as np
 
1
  #!/usr/bin/env python3
2
 
3
+
 
4
 
5
  import pandas as pd
6
  import numpy as np
dt.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from Dictionary_guesser.name_nation_guesser import NameNationGuesser
2
+ # import datetime
3
+ # from GitScraping import CommitInfo
4
+ # if __name__ == "__main__":
5
+
6
+ # guesser =NameNationGuesser(names_filename="Dictionary_guesser/names.csv",places_filename='Dictionary_guesser/places.tab', guess_first_second_min_mag=None,place_column_name="sub-region")
7
+ # commit_info=CommitInfo("https://github.com/AhmadM-DL/On-Learning-Implicit-Protected-Attributes")
8
+ # df,first_commit_dates = commit_info.get_first_commit_dates()
9
+
10
+ # def guess_zone(name, epoch, offset):
11
+ # dt = datetime.datetime.fromtimestamp(epoch)
12
+ # country_pop_map = guesser.country_pop_from_datetime(dt, offset)
13
+ # # print(country_pop_map)
14
+ # return guesser.guess_zone(name, country_pop_map=country_pop_map)
15
+ # first_commit_dates['Commit_Seconds'] = first_commit_dates['First_Commit_Date'].apply(lambda x: x.timestamp())
16
+ # first_commit_dates['Author_Timezone'] = first_commit_dates['Author_Timezone'] /60
17
+
18
+
19
+ # first_commit_dates['region_Dictionary'] = first_commit_dates.apply(lambda row: guess_zone(row['Author'],row['Commit_Seconds'], row['Author_Timezone']), axis=1)
20
+ # print(first_commit_dates)
21
+
22
+ from google.cloud import storage
23
+ import json
24
+ import os
25
+ import pandas as pd
26
+ # # Initialize a client
27
+ # jsonApi = os.getenv('apiKey')
28
+ # print(jsonApi)
29
+
30
+ # export jsonApi='{
31
+ # "type": "service_account",
32
+ # "project_id": "kinetic-guild-369323",
33
+ # "private_key_id": "b06a3ad76990da0e6970c072e95e7d26bb2e8c1d",
34
+ # "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTahMlxsz8Zv1T\nY3+C4E5MS6SnP7ESeAKdhm6IHiELrvekmPsTkaZhf1UWpOuzj76iklWwocVpnDCw\nINn0BAg/ttBKA24PQzTinsw2C5gRtB4J36n5rLZ2wmLw8HkLXsm2z7w+4h0VHLCN\nIwp1+L/AS967nQ5qXzf+AR47RoKcMY7Ia2WzF8Cv+/rBsVbl2w+Mz7xTSnZl8neg\nIaL4QMguZWHR6W9Hc/lt1+ZSoDgZmqj+DjXd5NPwNfckPaX5nppz/kzmfuUkWI8U\njBv6KyqBKcmdr1aJMc8XH64y7BW6pmH7WEJAN/H/uiycj1b09Lr27BUwogbr/0Ae\n6exf0lnZAgMBAAECggEAL2foPj7LRUe0w0ea1paEiCAoHiauhoUplPgJffU/lLaZ\nqitxlWxCAjfCtS6q+ZsgdKTamR5VPX67/iqHpOtojBzqrMYDHmIEEFLqWK4V3dZl\nK/Ke0zESwyOIex15Dv8kvRzsya77NXo27pbuaBCssqpwmeI4UsriK89FX6ZKcEpV\n7xMJgOm9WA0OrPsO6GFVF5htvTh0QFuoq1kJDiQguOrez9qa+F52PXl4RArwCSeK\nbchQhHd6ASXCyRB+Bx39Vh62Xv6xJ6LiEsCC41gzH6jHAyFZZ/v2mFAlHnUg9qIN\nJKdQM5zXFA1dUB/l17k8BWu+Achanegd7gNxv1prrwKBgQD6AEgxYrVT5sChZjP2\nvzQIYNlx+rft8e/MsOjKzl+9ObS+1xuhTlekZnxhtRm8vAoD7XM4Rb4fDJWwR2vq\naBVvKt93Eg68gWsPOmqkdPkPLiW877VU4QLk07yEM6Nl/OqdlRF0EzKXHXmk4AVL\njSh0MLbc7McmoLVgre33m754IwKBgQDYfMKo4f8bev+91kXMTlTq/Kgpyyaqwcsq\nibyNYAtXdwknZW0iOhA9lQZADH9vG9QELspi0Zy2Uv1LLVk1cGJ6up8eVpWFzZ2S\nSgLZJ6WuqK6OcewqWFA/WQ3U94lKgdnWqT/rDSHgnw3kSmyEiQ+IL0zKa8IzuV0F\nRGqj4Ngn0wKBgQD1NMGabs6bdIELzUq6gd9vOE8O1HMDF4G0qvApuzF8T9VQOXwI\nQucDgOIOk6qiy2ynXYbdcsp/ecB4HhVi3KPpXYvBJhz+F5ICZbGjjHecxA6Pui2J\nCwnjlyoYIO3rYp5b4ZI033+HaImfhXqsF8/N5tn05uiOoqJEKVR2wHOZMQKBgBRy\nhDhLUDsaPPmDOYh4hZDEWGXKKFbMgxH7fHGl9qxGM/kinVI0RcBrSPHXvFmUOUxD\n1x3KSpD1+bKWD+z6NnL9GXZWGz1OFGnyz54PHpkGmaYeoH3HZZz2HlZVIwSEizy5\nM65RyTdcDoXXebRy9aKZRRmBYBBem6iZs7DS1de9AoGAFf/tR1HK4Cugh9vebzp0\nB5j7EJP1XESDKsGAOIFC7dereuDNHMDmRH72BMYSBvrfAY77mDzEpW1TGK9Qxch3\nvm1tKCdZTnYSMoq0nbc/QIFyn20StR6OD+0nS94NN8IpGM882D7fWITrhn4XrZe3\nrdE4C0JqAQ6BKL0ka4j93eQ=\n-----END PRIVATE KEY-----\n",
35
+ # "client_email": "[email protected]",
36
+ # "client_id": "102736498211031284416",
37
+ # "auth_uri": "https://accounts.google.com/o/oauth2/auth",
38
+ # "token_uri": "https://oauth2.googleapis.com/token",
39
+ # "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
40
+ # "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/geogenderali%40kinetic-guild-369323.iam.gserviceaccount.com",
41
+ # "universe_domain": "googleapis.com"
42
+ # }'
43
+ #
44
+ # os.environ['apiKey'] = json_string
45
+ #
46
+
47
+ jsonApi = os.getenv('jsonApi')
48
+ bucket_name = os.getenv('bucket_name')
49
+ file_name = os.getenv('file_name')
50
+ print(file_name)
51
+ print(bucket_name)
52
+ print(jsonApi)
53
+ service_account_info = json.loads(jsonApi)
54
+ client = storage.Client.from_service_account_info(service_account_info)
55
+
56
+ blob = client.get_bucket(bucket_name).blob(file_name)
57
+
58
+ with blob.open("r") as file:
59
+ df = pd.read_csv(file,sep="\t")
60
+
61
+ print(df.head())
62
+ # Now df contains the data from the CSV file
63
+ print(df.head())
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+ #!/usr/bin/env python3
86
+
87
+ __copyright__ = "Copyright (C) 2022 Davide Rossi"
88
+ __license__ = "GPL-3.0-or-later"
89
+
90
+ import pandas as pd
91
+ import numpy as np
92
+ import re
93
+ import random
94
+ import pytz
95
+ import datetime
96
+ import time
97
+ import regex
98
+ import click
99
+ import code
100
+ import logging
101
+ from enum import Enum
102
+ import csv
103
+ from unidecode import unidecode
104
+ from collections import defaultdict
105
+
106
+
107
+ class Algorithms(str,Enum):
108
+ AVG = 'avg'
109
+ PROD = 'prod'
110
+ class NameNationGuesser:
111
+ WORD_RE = re.compile(r'\W+')
112
+ CAMEL_RE = re.compile(r'([A-Z][a-z]+)')
113
+ UPPER_RE = re.compile(r'([A-Z]+)')
114
+ UNDER_RE = re.compile(r'(_)')
115
+ LEADING_BLANKS_RE = re.compile(r'^\s*')
116
+ DEFAULT_PLACES_FILENAME = 'places.tab'
117
+ DEFAULT_NAMES_FILENAME = 'names_codes.tab'
118
+ DEFAULT_GUESS_FIRST_SECOND_MIN_MAG = None
119
+ DEFAULT_ALGORITHM = Algorithms.AVG
120
+ DEFAULT_COLUMN_NAME="zone"
121
+
122
+ def __init__(self, places_filename=DEFAULT_PLACES_FILENAME, names_filename=DEFAULT_NAMES_FILENAME, algorithm=DEFAULT_ALGORITHM, guess_first_second_min_mag=DEFAULT_GUESS_FIRST_SECOND_MIN_MAG,place_column_name=DEFAULT_COLUMN_NAME):
123
+ self.zone_by_place = {}
124
+ self.pop_by_place = {}
125
+ self.min_freq_by_place = {}
126
+ self.cumsum_population_by_code = {}
127
+ self.name_rows_by_name = defaultdict(list)
128
+ self.all_timezones = None
129
+ self.places_data = None
130
+ self.names_data = None
131
+ self.names_data_empty = None
132
+ self.names_data_col_names = ['name', 'type', 'code', 'frequency', 'gender']
133
+ self.names_data_dtype = {'frequency': float}
134
+ self.names_data_by_name = None
135
+ self.guess_first_second_min_mag = guess_first_second_min_mag
136
+ self.algorithm = algorithm
137
+ self.place_column_name=place_column_name
138
+ self.places_data = pd.read_csv(places_filename, sep='\t', header=0, keep_default_na=False, na_values='',
139
+ names=['country', 'state_name', 'region', 'un_subregion', 'zone', 'timezone', 'population', 'sovereignty_numeric', 'sovereignty', 'code', 'code3', 'code_num', 'cctdl',"_",'sub-region'],
140
+ dtype={'population':int, 'sovereignty_numeric':int, 'code_num':int})
141
+
142
+ self.names_data = self.__read_names_data(names_filename)
143
+ self.names_data_empty = pd.DataFrame().reindex(columns=self.names_data.columns)
144
+
145
+ @classmethod
146
+ def advanced_splitter(cls, seq):
147
+ """Split words separated by spaces or using CamelNotation"""
148
+ return cls.WORD_RE.split(cls.LEADING_BLANKS_RE.sub(r'', cls.CAMEL_RE.sub(r' \1', cls.UPPER_RE.sub(r' \1', cls.UNDER_RE.sub(r' ', seq)))))
149
+
150
+ def __get_cumsum_population(self, code):
151
+ if code not in self.cumsum_population_by_code:
152
+ df = self.places_data
153
+ #create a data frame for a specific code with an ordered cumsum population
154
+ df_code = df[(df['code'] == code) & (df['population'] != 0)][['timezone', 'population']]
155
+ population = df_code['population'].sum()
156
+ df_code = df_code.sort_values('population')
157
+ df_code['population'] = df_code['population'].cumsum()
158
+ self.cumsum_population_by_code[code] = (population, df_code.copy().reset_index(drop=True))
159
+ return self.cumsum_population_by_code[code]
160
+
161
+ def compatible_datetime_offset(self, code):
162
+ #extract a random timezone, with a chance proportional to the population of the people in that timezone
163
+ population, df_code = self.__get_cumsum_population(code)
164
+ timezone_name = df_code[df_code['population'] >= random.randrange(population)].iloc[0]['timezone']
165
+ valid_time = False
166
+ while not valid_time:
167
+ valid_time = True
168
+ #create a random datetime from 1/1/1970 to now
169
+ current_epoch = time.time()
170
+ epoch = random.uniform(0, current_epoch)
171
+ dt = datetime.datetime.fromtimestamp(epoch)
172
+ #localize the datetime using the timezone and calculate its UTC offset
173
+ timezone = pytz.timezone(timezone_name)
174
+ try: #it may not work because of an ambiguous or unexistent time for that timezone in that date
175
+ offset = int(timezone.utcoffset(dt).total_seconds()/60)
176
+ except:
177
+ valid_time = False
178
+ return dt, offset
179
+ def is_roman_language(self,text):
180
+ roman_pattern = r'^\p{Latin}+$'
181
+ match = regex.match(roman_pattern, text, flags=regex.UNICODE)
182
+ return match is not None
183
+
184
+ def text_to_romanize(self,text):
185
+ text=str(text)
186
+ translator = str.maketrans(r"-._\/+", " ")
187
+ text= text.translate(translator)
188
+ if not self.is_roman_language(text):
189
+ return unidecode(text)
190
+ else :
191
+ return text
192
+ def __read_names_data(self, names_filename):
193
+ self.names_data_by_name = defaultdict(list)
194
+ names = self.names_data_col_names
195
+ name_pos = names.index('name')
196
+ dtype = self.names_data_dtype
197
+ rows = []
198
+ with open(names_filename, "r") as file:
199
+ reader = csv.reader(file, delimiter='\t')
200
+ next(reader)
201
+ for row in reader:
202
+ name = row[name_pos].lower()
203
+ row[name_pos] = name
204
+ rows.append(row)
205
+ self.name_rows_by_name[name].append(row)
206
+ names_data = pd.DataFrame(rows)
207
+ names_data.columns = names
208
+ names_data = names_data.astype(dtype)
209
+
210
+ return names_data
211
+
212
+ def place_population(self, code):
213
+ if code not in self.pop_by_place:
214
+ self.pop_by_place[code] = self.places_data[self.places_data.code == code].population.sum()
215
+ return self.pop_by_place[code]
216
+
217
+ def min_frequency(self, code):
218
+ if code not in self.min_freq_by_place:
219
+ # min_freq_dict[code] = names_data[names_data.code == code]['frequency'].min()
220
+ # self.min_freq_by_place[code] = self.names_data['frequency'].min()
221
+ self.min_freq_by_place[code] = self.names_data[(self.names_data['code'] == code) & (self.names_data['frequency'] > 0)]['frequency'].min()
222
+ return self.min_freq_by_place[code]
223
+
224
+ def name_data_for_name(self, name):
225
+ if name in self.names_data_by_name:
226
+ return self.names_data_by_name[name]
227
+ elif name in self.name_rows_by_name:
228
+ self.names_data_by_name[name] = pd.DataFrame(self.name_rows_by_name[name])
229
+ self.names_data_by_name[name].columns = self.names_data_col_names
230
+ self.names_data_by_name[name] = self.names_data_by_name[name].astype(self.names_data_dtype)
231
+ return self.names_data_by_name[name]
232
+ else:
233
+ return self.names_data_empty
234
+ # return names_data_by_name[name] if name in names_data_by_name else names_data_empty #that is deadly slow, it's better to create a new data frame for each name
235
+
236
+ def get_all_timezones(self):
237
+ df = self.places_data
238
+ if self.all_timezones is None:
239
+ self.all_timezones = list(df[(df['timezone'].notnull()) & (df['population'] > 0)]['timezone'].unique())
240
+ return self.all_timezones
241
+
242
+ def country_pop_from_datetime(self, dt, offset):
243
+ df = self.places_data
244
+ places_pop = {}
245
+ for tz in self.get_all_timezones():
246
+ timezone = pytz.timezone(tz)
247
+ try:
248
+ timezone_offset = timezone.utcoffset(dt).total_seconds() // 60
249
+ except pytz.exceptions.AmbiguousTimeError:
250
+ timezone_offset = timezone.utcoffset(dt, is_dst=True).total_seconds() // 60
251
+ except pytz.exceptions.NonExistentTimeError:
252
+ timezone_offset = None
253
+ if timezone_offset == offset:
254
+ df_tz_pop = df[df['timezone'] == tz].iloc[0]
255
+ population = df_tz_pop['population']
256
+ code = df_tz_pop['code']
257
+ places_pop[code] = population
258
+ return places_pop
259
+
260
+ def score_a_name_part(self, name, countries=None, country_pop_map=None):
261
+ if countries is not None and country_pop_map is not None:
262
+ raise ValueError(f'At least one of countries and country_pop_map must be None')
263
+ name_data = self.name_data_for_name(name)
264
+ if country_pop_map is not None:
265
+ countries = list(country_pop_map.keys())
266
+ if countries is not None:
267
+ name_data = name_data[name_data.code.isin(countries)]
268
+ score_dict = {}
269
+ for code, _, frequency in zip(name_data.code, name_data.type, name_data.frequency):
270
+ # if not places_data[places_data.code == code].empty:
271
+ if code in self.places_data["code"].values:
272
+ if country_pop_map is not None:
273
+ population = country_pop_map[code]
274
+ else:
275
+ population = self.place_population(code)
276
+ score = population * frequency
277
+ score_dict[code] = score if not code in score_dict else score + score_dict[code]
278
+ else:
279
+ raise LookupError(f'{code} not in places data frame')
280
+
281
+ return [(code, score) for code, score in sorted(score_dict.items(), key=lambda item: item[1], reverse=True)], score_dict
282
+
283
+ def guess_scores(self, name, countries=None, country_pop_map=None, return_dict=False):
284
+ if countries is not None and country_pop_map is not None:
285
+ raise ValueError(f'At least one of countries and country_pop_map must be None')
286
+ #collect scores dict for all name parts
287
+ score_parts = []
288
+ name = name.lower()
289
+ for name_part in NameNationGuesser.advanced_splitter(name):
290
+ _, score_part_dict = self.score_a_name_part(name_part, countries=countries, country_pop_map=country_pop_map)
291
+
292
+ score_parts.append(score_part_dict)
293
+ #identify all places in the scores
294
+ all_places = set()
295
+ for score_part in score_parts:
296
+ all_places = all_places.union(set(score_part.keys()))
297
+ parts = len(score_parts)
298
+ #construct a scores dict with the score for each place
299
+ scores_avg = {}
300
+ for place in all_places:
301
+ scores = []
302
+ population = self.place_population(place) #TODO: should we use the population of country_pop_map if available?
303
+ for score_part in score_parts:
304
+ if place in score_part:
305
+ scores.append(score_part[place])
306
+ else:
307
+ if self.algorithm == Algorithms.AVG:
308
+ scores.append(0)
309
+ elif self.algorithm == Algorithms.PROD:
310
+ scores.append(self.min_frequency(place) * population)
311
+ if self.algorithm == Algorithms.AVG:
312
+ score = sum(scores) / len(scores)
313
+ elif self.algorithm == Algorithms.PROD:
314
+ score = np.prod([score/population for score in scores]) * population #each score part is already multiplied by population, this fixes that
315
+ else:
316
+ raise ValueError(f'Unknown algorithm: {self.algorithm}')
317
+ scores_avg[place] = score
318
+ retval = [(code, score) for code, score in sorted(scores_avg.items(), key=lambda item: item[1], reverse=True)]
319
+ if return_dict:
320
+ return retval, scores_avg
321
+ else:
322
+ return retval
323
+
324
+ def guess(self, name, countries=None, country_pop_map=None):
325
+ if countries is not None and country_pop_map is not None:
326
+ raise ValueError(f'At least one of countries and country_pop_map must be None')
327
+ scores = self.guess_scores(name, countries=countries, country_pop_map=country_pop_map)
328
+ if len(scores) == 0:
329
+ return None
330
+ if len(scores) == 1 or self.guess_first_second_min_mag is None:
331
+ place, _ = scores[0]
332
+ return place
333
+ else:
334
+ place, score0 = scores[0]
335
+ _, score1 = scores[1]
336
+ if score0 >= score1 * self.guess_first_second_min_mag:
337
+ return place
338
+ else:
339
+ return None
340
+
341
+
342
+ def zone_scores_from_place_scores(self, score_list, return_dict=False):
343
+ score_dict = {}
344
+ for code, score in score_list:
345
+ zone = self.get_zone_by_place(code)
346
+ score_dict[zone] = score if zone not in score_dict else score + score_dict[zone]
347
+ retval = [(zone, score) for zone, score in sorted(score_dict.items(), key=lambda item: item[1], reverse=True)]
348
+
349
+ if return_dict:
350
+ return retval, score_dict
351
+ else:
352
+ return retval
353
+
354
+
355
+ def zone_scores(self, name, countries=None, country_pop_map=None, return_dict=False):
356
+ if countries is not None and country_pop_map is not None:
357
+ raise ValueError(f'At least one of countries and country_pop_map must be None')
358
+ score_list = self.guess_scores(name, countries=countries, country_pop_map=country_pop_map)
359
+ return self.zone_scores_from_place_scores(score_list, return_dict=return_dict)
360
+
361
+ def guess_zone(self, name, countries=None, country_pop_map=None):
362
+ scores = self.zone_scores(name, countries=countries, country_pop_map=country_pop_map)
363
+ if len(scores) == 0:
364
+ return None
365
+
366
+ if len(scores) == 1 or self.guess_first_second_min_mag is None:
367
+ place, _ = scores[0]
368
+ return place
369
+ else:
370
+ place, score0 = scores[0]
371
+ _, score1 = scores[1]
372
+ if score0 >= score1 * self.guess_first_second_min_mag:
373
+ return place
374
+ else:
375
+ return None
376
+
377
+
378
+
379
+ def get_zone_by_place(self, code):
380
+ if code in self.zone_by_place:
381
+ return self.zone_by_place[code]
382
+ places_data_code = self.places_data[self.places_data.code == code]
383
+ zone = places_data_code.loc[places_data_code['population'].idxmax()][self.place_column_name]
384
+ self.zone_by_place[code] = zone
385
+
386
+ return zone