madhavkotecha commited on
Commit
59d8c9f
·
verified ·
1 Parent(s): ce62bb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -94,12 +94,12 @@ class NEI:
94
  # plt.colorbar()
95
  # plt.savefig('Confusion_Matrix.png')
96
 
97
- def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0, prev_token=None):
98
  is_titlecase = 1 if w[0].isupper() else 0
99
  is_allcaps = 1 if w.isupper() else 0
100
  is_sw = 1 if w.lower() in SW else 0
101
  is_punct = 1 if w in PUNCT else 0
102
- is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
103
  is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
104
  # is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
105
  # is_start_of_sentence = 1 if scaled_position == 0 else 0
@@ -113,7 +113,7 @@ class NEI:
113
  for i, token in enumerate(tokens):
114
  prev_tag = tags[i - 1] if i > 0 else 0
115
  next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
116
- x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag, prev_token=tokens[i-1] if i > 0 else None)
117
  y = 1 if tags[i] > 0 else 0
118
  features.append(x)
119
  labels.append(y)
@@ -140,7 +140,7 @@ class NEI:
140
  tokens = word_tokenize(sentence)
141
  features = []
142
 
143
- raw_features = [self.vectorize(token, i / len(tokens), prev_token=tokens[i-1] if i > 0 else None) for i, token in enumerate(tokens)]
144
  raw_features = np.array(raw_features, dtype=np.float32)
145
  scaled_features = self.scaler.transform(raw_features)
146
  y_pred = self.model.predict(scaled_features)
@@ -149,7 +149,7 @@ class NEI:
149
  prev_tag = y_pred[i - 1] if i > 0 else 0
150
  next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
151
 
152
- feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag, prev_token=tokens[i-1] if i > 0 else None)
153
  features.append(feature_with_context)
154
 
155
  features = np.array(features, dtype=np.float32)
 
94
  # plt.colorbar()
95
  # plt.savefig('Confusion_Matrix.png')
96
 
97
+ def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0):
98
  is_titlecase = 1 if w[0].isupper() else 0
99
  is_allcaps = 1 if w.isupper() else 0
100
  is_sw = 1 if w.lower() in SW else 0
101
  is_punct = 1 if w in PUNCT else 0
102
+ # is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
103
  is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
104
  # is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
105
  # is_start_of_sentence = 1 if scaled_position == 0 else 0
 
113
  for i, token in enumerate(tokens):
114
  prev_tag = tags[i - 1] if i > 0 else 0
115
  next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
116
+ x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag)
117
  y = 1 if tags[i] > 0 else 0
118
  features.append(x)
119
  labels.append(y)
 
140
  tokens = word_tokenize(sentence)
141
  features = []
142
 
143
+ raw_features = [self.vectorize(token, i / len(tokens)) for i, token in enumerate(tokens)]
144
  raw_features = np.array(raw_features, dtype=np.float32)
145
  scaled_features = self.scaler.transform(raw_features)
146
  y_pred = self.model.predict(scaled_features)
 
149
  prev_tag = y_pred[i - 1] if i > 0 else 0
150
  next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
151
 
152
+ feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag)
153
  features.append(feature_with_context)
154
 
155
  features = np.array(features, dtype=np.float32)