ifmain commited on
Commit
1ad8180
·
verified ·
1 Parent(s): 7e18f62

Upload gen.py

Browse files
Files changed (1) hide show
  1. gen.py +153 -0
gen.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ print('Load libs')
4
+ import http.server
5
+ import socketserver
6
+ import threading
7
+ import time
8
+
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, T5ForConditionalGeneration, T5Tokenizer
10
+ from collections import defaultdict
11
+ from bs4 import BeautifulSoup
12
+ from threading import Thread
13
+ import requests as rq
14
+ import random
15
+ import torch
16
+ import json
17
+ import time
18
+ import os
19
+ import re
20
+
21
+ import difflib
22
+ import logging
23
+ logging.getLogger('http.server').setLevel(logging.ERROR)
24
+
25
+
26
+
27
+ os.system('cls||clear')
28
+
29
+ # Constants
30
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
31
+
32
+
33
+ def load_model(model_name_or_path):
34
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
35
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
36
+
37
+ total_params = sum(p.numel() for p in model.parameters())
38
+ wpe_weights = model.get_input_embeddings().weight
39
+ wpe_weights_np = wpe_weights.detach().numpy()
40
+ mt_size = list(wpe_weights.shape)[0]
41
+ model = model.to(DEVICE)
42
+ params = {'model': model_name_or_path, 'size': f'{int(total_params / 10**7) / 100}B', 'text': mt_size, 'device': DEVICE}
43
+ return model, tokenizer, params
44
+
45
+
46
+ def model_log(params):
47
+ model_name = f'Model: {params["model"]}'
48
+ param_size = f'Size model: {params["size"]}'
49
+ max_tokens = f'Maximum Tokens: {params["text"]}'
50
+ device_info = f'Device: {params["device"]}'
51
+ max_length = max([len(model_name), len(param_size), len(max_tokens), len(device_info)])
52
+ padding = ''.zfill(max_length + 4).replace('0', '#')
53
+ model_name = f'# {model_name}{"".zfill(max_length - len(model_name)).replace("0", " ")} #'
54
+ param_size = f'# {param_size}{"".zfill(max_length - len(param_size)).replace("0", " ")} #'
55
+ max_tokens = f'# {max_tokens}{"".zfill(max_length - len(max_tokens)).replace("0", " ")} #'
56
+ device_info = f'# {device_info}{"".zfill(max_length - len(device_info)).replace("0", " ")} #'
57
+ return f'{padding}\n{model_name}\n{param_size}\n{max_tokens}\n{device_info}\n{padding}'
58
+
59
+ def encode_ids(text, tokenizer):
60
+ return tokenizer.encode(text, return_tensors="pt").to(DEVICE)
61
+
62
+ def generate_step_by_step(config, model, tokenizer,file):
63
+ if True:
64
+ text_input = config['text']
65
+ input_ids = encode_ids(text_input, tokenizer)
66
+ target = config['maxsize'] - len(input_ids[0])
67
+ current_length = len(input_ids[0])
68
+ for i in range(target):
69
+ output = model.generate(input_ids,
70
+ do_sample=config['do_sample'],
71
+ temperature=config['temperature'],
72
+ top_k=config['top_k'],
73
+ top_p=config['top_p'],
74
+ max_length=current_length + 6,
75
+ pad_token_id=tokenizer.eos_token_id,
76
+ num_return_sequences=config['num_return_sequences']
77
+ )
78
+ current_length += 4
79
+ text_output = tokenizer.decode(output[0][:current_length])
80
+ generated_text = text_output[len(config['text']):]
81
+ '''
82
+ if generated_text.count('import ') > 2:
83
+ generated_text+='\nИзвените но бот не умеет писать код\n\n'
84
+ if '```python' in generated_text:
85
+ generated_text+='\nИзвените но бот не умеет писать код\n\n'
86
+ '''
87
+ if len(generated_text.split('\n')) != 1:
88
+ return generated_text.split('\n')[0]
89
+ else:
90
+ '''
91
+ cleaned_text, found_repeats = remove_repeated_phrases(generated_text)
92
+ if found_repeats:
93
+ return cleaned_text
94
+ else:
95
+ '''
96
+ file2=open(file,'w',encoding='utf-8')
97
+ file2.write(text_input+generated_text)
98
+ file2.close()
99
+
100
+ os.system('cls||clear')
101
+ print(str(text_input+generated_text).replace('Me: ','User: '))
102
+
103
+ input_ids = encode_ids(text_output, tokenizer)
104
+ try:
105
+ return generated_text
106
+ except:
107
+ return ''
108
+
109
+ def botAw(text, model, tokenizer, params,file):
110
+ config = {
111
+ 'text': text,
112
+ 'do_sample': True,
113
+ 'temperature': 0.5,
114
+ 'top_k': 20,
115
+ 'top_p': 0.9,
116
+ 'maxsize': params["text"],
117
+ 'num_return_sequences': 1,
118
+ }
119
+ generated_text = generate_step_by_step(config, model, tokenizer,file)
120
+ return generated_text
121
+
122
+ print('Load GPT')
123
+ gpt_model_name = 'ifmain/StableGPT4-Micro-1.6B'
124
+ gpt_model, gpt_tokenizer, gpt_params = load_model(gpt_model_name)
125
+ print(model_log(gpt_params))
126
+ print()
127
+
128
+
129
+ file='file.txt'
130
+
131
+ try:
132
+ f=open(file,'r',encoding='UTF-8')
133
+ text=f.read()
134
+ f.close()
135
+ except:
136
+ f=open(file,'w',encoding='UTF-8')
137
+ f.close()
138
+
139
+
140
+ def var2():
141
+ while True:
142
+ input('Press Enter to process')
143
+ f=open(file,'r',encoding='UTF-8')
144
+ text=f.read()
145
+ f.close()
146
+
147
+ o=botAw(text, gpt_model, gpt_tokenizer, gpt_params,file)
148
+
149
+ f=open(file,'w',encoding='UTF-8')
150
+ f.write(text+o)
151
+ f.close()
152
+
153
+ var2()