Joshua Lansford commited on
Commit
a880e5e
·
1 Parent(s): fbbf27f

Stop run away

Browse files
Files changed (1) hide show
  1. transmorgrify.py +7 -2
transmorgrify.py CHANGED
@@ -371,7 +371,7 @@ def _train_catboost( X, y, iterations, device, verbose, model_piece, learning_ra
371
  train_pool = Pool(
372
  data=X,
373
  label=y,
374
- cat_features=[i for i,x in enumerate(X.keys()) if len(x) == 2] #all cat keys are length 2
375
  )
376
  validation_pool = None #Can't use validation pool because it randomly has chars not in training.
377
  model = CatBoostClassifier(
@@ -453,6 +453,11 @@ def _do_reconstruct( action_model, char_model, text, num_pre_context_chars, num_
453
  #run the model
454
  action_model_result = action_model.predict( context_as_pd )[0][0]
455
 
 
 
 
 
 
456
  if action_model_result == START:
457
  pass
458
  elif action_model_result == INSERT_TO:
@@ -583,7 +588,7 @@ def main():
583
  parser.add_argument('-b', '--b_header', help='The column header for training the transformation to', default="target" )
584
  parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
585
  parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=2000 )
586
- parser.add_argument('-d', '--device', help='Which device, i.e. if useing GPU', default='cpu' )
587
  parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
588
  parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
589
  parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
 
371
  train_pool = Pool(
372
  data=X,
373
  label=y,
374
+ cat_features=[i for i,x in enumerate(X.keys()) if x[0] in ['f','t','u']] #watchout if another field is added that it doesn't start with one of these.
375
  )
376
  validation_pool = None #Can't use validation pool because it randomly has chars not in training.
377
  model = CatBoostClassifier(
 
453
  #run the model
454
  action_model_result = action_model.predict( context_as_pd )[0][0]
455
 
456
+ #stop run away. If we have added more chars then our context, nothing is going to change.
457
+ if action_model_result == INSERT_TO and continuous_added >= num_post_context_chars:
458
+ #I can set this to MATCH or DELETE_FROM, but it is already a wreck, lets just see what happens with this.
459
+ action_model_result = MATCH
460
+
461
  if action_model_result == START:
462
  pass
463
  elif action_model_result == INSERT_TO:
 
588
  parser.add_argument('-b', '--b_header', help='The column header for training the transformation to', default="target" )
589
  parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
590
  parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=2000 )
591
+ parser.add_argument('-d', '--device', help='Which device, i.e. if using GPU', default='cpu' )
592
  parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
593
  parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
594
  parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )