joonavel commited on
Commit
9de3576
ยท
verified ยท
1 Parent(s): db4cfaa

Update README.md

Browse files

add description of model

Files changed (1) hide show
  1. README.md +82 -2
README.md CHANGED
@@ -17,7 +17,9 @@ tags:
17
  # 100suping/Qwen2.5-Coder-34B-Instruct-kosql-adapter
18
 
19
  <!-- Provide a quick summary of what the model is/does. -->
20
- This Repo contains **LoRA (Low-Rank Adaptation) Adapter** for [unsloth/qwen2.5-coder-32b-instruct]
 
 
21
 
22
  This adapter was created through **instruction tuning**.
23
 
@@ -29,7 +31,6 @@ This adapter was created through **instruction tuning**.
29
  <!-- Provide a longer summary of what this model is. -->
30
 
31
 
32
-
33
  - **Base Model:** unsloth/Qwen2.5-Coder-32B-Instruct
34
  - **Task:** Instruction Following(Korean)
35
  - **Language:** English (or relevant language)
@@ -47,13 +48,92 @@ To use this LoRA adapter, refer to the following code:
47
  <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
 
49
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ```
51
 
 
52
  ### Inference
53
 
54
  <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
55
 
56
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ```
58
 
59
  ## Bias, Risks, and Limitations
 
17
  # 100suping/Qwen2.5-Coder-34B-Instruct-kosql-adapter
18
 
19
  <!-- Provide a quick summary of what the model is/does. -->
20
+ This Repo contains **LoRA (Low-Rank Adaptation) Adapter** for [unsloth/qwen2.5-coder-32b-instruct-bnb-4bit]
21
+
22
+ The Adapter was trained for improving model's SQL generation capability in Korean question & multi-db context.
23
 
24
  This adapter was created through **instruction tuning**.
25
 
 
31
  <!-- Provide a longer summary of what this model is. -->
32
 
33
 
 
34
  - **Base Model:** unsloth/Qwen2.5-Coder-32B-Instruct
35
  - **Task:** Instruction Following(Korean)
36
  - **Language:** English (or relevant language)
 
48
  <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
49
 
50
  ```
51
+ GENERAL_QUERY_PREFIX = """๋‹น์‹ ์€ ์‚ฌ์šฉ์ž์˜ ์ž…๋ ฅ์„ MySQL ์ฟผ๋ฆฌ๋ฌธ์œผ๋กœ ๋ฐ”๊พธ์–ด์ฃผ๋Š” ์กฐ์ง์˜ ํŒ€์›์ž…๋‹ˆ๋‹ค.
52
+ ๋‹น์‹ ์˜ ์ž„๋ฌด๋Š” DB ์ด๋ฆ„ ๊ทธ๋ฆฌ๊ณ  DB๋‚ด ํ…Œ์ด๋ธ”์˜ ๋ฉ”ํƒ€ ์ •๋ณด๊ฐ€ ๋‹ด๊ธด ์•„๋ž˜์˜ (context)๋ฅผ ์ด์šฉํ•ด์„œ ์ฃผ์–ด์ง„ ์งˆ๋ฌธ(user_question)์— ๊ฑธ๋งž๋Š” MySQL ์ฟผ๋ฆฌ๋ฌธ์„ ์ž‘์„ฑํ•˜๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค.
53
+
54
+ (context)
55
+ {context}
56
+ """
57
+
58
+ GENERATE_QUERY_INSTRUCTIONS = """
59
+ ์ฃผ์–ด์ง„ ์งˆ๋ฌธ(user_question)์— ๋Œ€ํ•ด์„œ ๋ฌธ๋ฒ•์ ์œผ๋กœ ์˜ฌ๋ฐ”๋ฅธ MySQL ์ฟผ๋ฆฌ๋ฌธ์„ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”.
60
+ """
61
+ ```
62
+
63
+ ### Preprocess Functions
64
+
65
+ ```
66
+ def get_conversation_data(examples):
67
+ questions = examples['question']
68
+ schemas =examples['schema']
69
+ sql_queries =examples['SQL']
70
+ convos = []
71
+ for question, schema, sql in zip(questions, schemas, sql_queries):
72
+ conv = [
73
+ {"role": "system", "content": GENERAL_QUERY_PREFIX.format(context=schema) + GENERATE_QUERY_INSTRUCTIONS},
74
+ {"role": "user", "content": question},
75
+ {"role": "assistant", "content": "```sql\n"+sql+";\n```"}
76
+ ]
77
+ convos.append(conv)
78
+ return {"conversation":convos,}
79
+
80
+ def formatting_prompts_func(examples):
81
+ convos = examples["conversation"]
82
+ texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
83
+ return { "text" : texts, }
84
+ ```
85
+
86
+ ### Example input
87
+
88
+ ```
89
+ <|im_start|>system
90
+ ๋‹น์‹ ์€ ์‚ฌ์šฉ์ž์˜ ์ž…๋ ฅ์„ MySQL ์ฟผ๋ฆฌ๋ฌธ์œผ๋กœ ๋ฐ”๊พธ์–ด์ฃผ๋Š” ์กฐ์ง์˜ ํŒ€์›์ž…๋‹ˆ๋‹ค.
91
+ ๋‹น์‹ ์˜ ์ž„๋ฌด๋Š” DB ์ด๋ฆ„ ๊ทธ๋ฆฌ๊ณ  DB๋‚ด ํ…Œ์ด๋ธ”์˜ ๋ฉ”ํƒ€ ์ •๋ณด๊ฐ€ ๋‹ด๊ธด ์•„๋ž˜์˜ (context)๋ฅผ ์ด์šฉํ•ด์„œ ์ฃผ์–ด์ง„ ์งˆ๋ฌธ(user_question)์— ๊ฑธ๋งž๋Š” MySQL ์ฟผ๋ฆฌ๋ฌธ์„ ์ž‘์„ฑํ•˜๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค.
92
+
93
+ (context)
94
+ DB: movie_platform
95
+ table DDL: CREATE TABLE `movies` ( `movie_id` INTEGER `movie_title` TEXT `movie_release_year` INTEGER `movie_url` TEXT `movie_title_language` TEXT `movie_popularity` INTEGER `movie_image_url` TEXT `director_id` TEXT `director_name` TEXT `director_url` TEXT PRIMARY KEY (movie_id) FOREIGN KEY (user_id) REFERENCES `lists_users`(user_id) FOREIGN KEY (user_id) REFERENCES `lists_users`(user_id) FOREIGN KEY (user_id) REFERENCES `lists`(user_id) FOREIGN KEY (list_id) REFERENCES `lists`(list_id) FOREIGN KEY (user_id) REFERENCES `ratings_users`(user_id) FOREIGN KEY (user_id) REFERENCES `lists_users`(user_id) FOREIGN KEY (movie_id) REFERENCES `movies`(movie_id) );
96
+
97
+
98
+ ์ฃผ์–ด์ง„ ์งˆ๋ฌธ(user_question)์— ๋Œ€ํ•ด์„œ ๋ฌธ๋ฒ•์ ์œผ๋กœ ์˜ฌ๋ฐ”๋ฅธ MySQL ์ฟผ๋ฆฌ๋ฌธ์„ ์ž‘์„ฑํ•ด ์ฃผ์„ธ์š”.
99
+ <|im_end|>
100
+ <|im_start|>user
101
+ ๊ฐ€์žฅ ์ธ๊ธฐ ์žˆ๋Š” ์˜ํ™”๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”? ๊ทธ ์˜ํ™”๋Š” ์–ธ์ œ ๊ฐœ๋ด‰๋˜์—ˆ๊ณ  ๋ˆ„๊ฐ€ ๊ฐ๋…์ธ๊ฐ€์š”?<|im_end|>
102
+ <|im_start|>assistant
103
+ ```sql
104
+ SELECT movie_title, movie_release_year, director_name FROM movies ORDER BY movie_popularity DESC LIMIT 1 ;
105
+ ```<|im_end|>
106
  ```
107
 
108
+
109
  ### Inference
110
 
111
  <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
112
 
113
  ```
114
+ messages = [
115
+ {"role": "system", "content": GENERAL_QUERY_PREFIX.format(context=context) + GENERATE_QUERY_INSTRUCTIONS},
116
+ {"role": "user", "content": "user_question: "+ user_question}
117
+ ]
118
+
119
+
120
+ text = tokenizer.apply_chat_template(
121
+ messages,
122
+ tokenize=False,
123
+ add_generation_prompt=True
124
+ )
125
+
126
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
127
+
128
+ generated_ids = model.generate(
129
+ **model_inputs,
130
+ max_new_tokens=max_new_tokens
131
+ )
132
+ generated_ids = [
133
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
134
+ ]
135
+
136
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
137
  ```
138
 
139
  ## Bias, Risks, and Limitations