Akjava commited on
Commit
ae274fc
·
1 Parent(s): 745693f
Files changed (3) hide show
  1. README.md +4 -4
  2. app.py +71 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Qwen2 05b
3
- emoji: 🦀
4
- colorFrom: blue
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.40.0
8
  app_file: app.py
 
1
  ---
2
+ title: qwen2-05b
3
+ emoji: 🐠
4
+ colorFrom: indigo
5
+ colorTo: gray
6
  sdk: gradio
7
  sdk_version: 4.40.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ # Load the model and tokenizer
5
+ model_name = "Qwen/Qwen1.5-0.5B-Chat"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(model_name)
8
+
9
+
10
+ def generate_text(text):
11
+ # Tokenize the input text, including attention mas
12
+ #input_ids = tokenizer(text, return_tensors="pt", padding=True)
13
+
14
+
15
+
16
+ messages = []
17
+ use_system_prompt = True
18
+ DEFAULT_SYSTEM_PROMPT = "you are helpfull assistant."
19
+ if use_system_prompt:
20
+ messages = [
21
+ {"role": "system", "content": DEFAULT_SYSTEM_PROMPT}
22
+ ]
23
+
24
+ user_messages = [
25
+ {"role": "user", "content": text}
26
+ ]
27
+ messages += user_messages
28
+
29
+ prompt = tokenizer.apply_chat_template(
30
+ conversation=messages,
31
+ add_generation_prompt=True,
32
+ tokenize=False
33
+ )
34
+
35
+ input_datas = tokenizer(
36
+ prompt,
37
+ add_special_tokens=True,
38
+ return_tensors="pt"
39
+ )
40
+
41
+ # Generate text, passing the attention mask
42
+ generated_ids = model.generate(input_ids=input_datas.input_ids, attention_mask=input_datas.attention_mask,max_length=10000)
43
+ #generated_ids = model.generate(input_ids=input_ids, max_length=100)
44
+
45
+ # Decode the generated tokens
46
+ generated_text = tokenizer.decode(generated_ids[0][input_datas.input_ids.size(1) :], skip_special_tokens=True)
47
+
48
+ # Print the generated text
49
+ #print(generated_text)
50
+ return generated_text
51
+
52
+ from flask import Flask, request, jsonify
53
+
54
+ app = Flask(__name__)
55
+ #app.logger.disabled = True
56
+ #log = logging.getLogger('werkzeug')
57
+ #log.disabled = True
58
+
59
+ @app.route('/')
60
+ def predict():
61
+ param_value = request.args.get('param', '')
62
+ # ここにモデルの推論ロジックを追加
63
+ #output = pipe(messages, **generation_args)
64
+ #text = (output[0]['generated_text'])
65
+ #print("hello")
66
+ #result = {"prediction": "dummy_result"}
67
+ text = generate_text(param_value)
68
+ return f"{text}"
69
+
70
+ if __name__ == '__main__':
71
+ app.run(host='0.0.0.0', port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ llama-cpp-python
2
+ transformers
3
+ torch
4
+ accelerate
5
+ flask