wangrongsheng
commited on
Upload 82 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LLM-Detector-V7-11w/src/api_demo.py +15 -0
- LLM-Detector-V7-11w/src/cli_demo.py +47 -0
- LLM-Detector-V7-11w/src/evaluate.py +10 -0
- LLM-Detector-V7-11w/src/export_model.py +9 -0
- LLM-Detector-V7-11w/src/llmtuner/__init__.py +10 -0
- LLM-Detector-V7-11w/src/llmtuner/api/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/api/app.py +195 -0
- LLM-Detector-V7-11w/src/llmtuner/api/protocol.py +96 -0
- LLM-Detector-V7-11w/src/llmtuner/chat/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/chat/chat_model.py +171 -0
- LLM-Detector-V7-11w/src/llmtuner/data/__init__.py +4 -0
- LLM-Detector-V7-11w/src/llmtuner/data/loader.py +173 -0
- LLM-Detector-V7-11w/src/llmtuner/data/preprocess.py +272 -0
- LLM-Detector-V7-11w/src/llmtuner/data/template.py +815 -0
- LLM-Detector-V7-11w/src/llmtuner/data/utils.py +51 -0
- LLM-Detector-V7-11w/src/llmtuner/eval/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/eval/evaluator.py +118 -0
- LLM-Detector-V7-11w/src/llmtuner/eval/template.py +86 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/__init__.py +0 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/callbacks.py +148 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/constants.py +707 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/logging.py +49 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/misc.py +196 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/packages.py +49 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/patches/__init__.py +0 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/patches/llama_patch.py +224 -0
- LLM-Detector-V7-11w/src/llmtuner/extras/ploting.py +55 -0
- LLM-Detector-V7-11w/src/llmtuner/hparams/__init__.py +5 -0
- LLM-Detector-V7-11w/src/llmtuner/hparams/data_args.py +189 -0
- LLM-Detector-V7-11w/src/llmtuner/hparams/evaluation_args.py +55 -0
- LLM-Detector-V7-11w/src/llmtuner/hparams/finetuning_args.py +188 -0
- LLM-Detector-V7-11w/src/llmtuner/hparams/generating_args.py +53 -0
- LLM-Detector-V7-11w/src/llmtuner/hparams/model_args.py +127 -0
- LLM-Detector-V7-11w/src/llmtuner/model/__init__.py +5 -0
- LLM-Detector-V7-11w/src/llmtuner/model/adapter.py +133 -0
- LLM-Detector-V7-11w/src/llmtuner/model/loader.py +129 -0
- LLM-Detector-V7-11w/src/llmtuner/model/parser.py +246 -0
- LLM-Detector-V7-11w/src/llmtuner/model/patcher.py +288 -0
- LLM-Detector-V7-11w/src/llmtuner/model/utils.py +129 -0
- LLM-Detector-V7-11w/src/llmtuner/train/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/train/dpo/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/train/dpo/collator.py +51 -0
- LLM-Detector-V7-11w/src/llmtuner/train/dpo/trainer.py +159 -0
- LLM-Detector-V7-11w/src/llmtuner/train/dpo/workflow.py +82 -0
- LLM-Detector-V7-11w/src/llmtuner/train/ppo/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/train/ppo/trainer.py +374 -0
- LLM-Detector-V7-11w/src/llmtuner/train/ppo/utils.py +49 -0
- LLM-Detector-V7-11w/src/llmtuner/train/ppo/workflow.py +103 -0
- LLM-Detector-V7-11w/src/llmtuner/train/pt/__init__.py +1 -0
- LLM-Detector-V7-11w/src/llmtuner/train/pt/workflow.py +62 -0
LLM-Detector-V7-11w/src/api_demo.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import uvicorn
|
3 |
+
|
4 |
+
from llmtuner import ChatModel, create_app
|
5 |
+
|
6 |
+
|
7 |
+
def main():
|
8 |
+
chat_model = ChatModel()
|
9 |
+
app = create_app(chat_model)
|
10 |
+
print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000)))
|
11 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
|
12 |
+
|
13 |
+
|
14 |
+
if __name__ == "__main__":
|
15 |
+
main()
|
LLM-Detector-V7-11w/src/cli_demo.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llmtuner import ChatModel
|
2 |
+
from llmtuner.extras.misc import torch_gc
|
3 |
+
|
4 |
+
try:
|
5 |
+
import platform
|
6 |
+
if platform.system() != "Windows":
|
7 |
+
import readline
|
8 |
+
except ImportError:
|
9 |
+
print("Install `readline` for a better experience.")
|
10 |
+
|
11 |
+
|
12 |
+
def main():
|
13 |
+
chat_model = ChatModel()
|
14 |
+
history = []
|
15 |
+
print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.")
|
16 |
+
|
17 |
+
while True:
|
18 |
+
try:
|
19 |
+
query = input("\nUser: ")
|
20 |
+
except UnicodeDecodeError:
|
21 |
+
print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
|
22 |
+
continue
|
23 |
+
except Exception:
|
24 |
+
raise
|
25 |
+
|
26 |
+
if query.strip() == "exit":
|
27 |
+
break
|
28 |
+
|
29 |
+
if query.strip() == "clear":
|
30 |
+
history = []
|
31 |
+
torch_gc()
|
32 |
+
print("History has been removed.")
|
33 |
+
continue
|
34 |
+
|
35 |
+
print("Assistant: ", end="", flush=True)
|
36 |
+
|
37 |
+
response = ""
|
38 |
+
for new_text in chat_model.stream_chat(query, history):
|
39 |
+
print(new_text, end="", flush=True)
|
40 |
+
response += new_text
|
41 |
+
print()
|
42 |
+
|
43 |
+
history = history + [(query, response)]
|
44 |
+
|
45 |
+
|
46 |
+
if __name__ == "__main__":
|
47 |
+
main()
|
LLM-Detector-V7-11w/src/evaluate.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llmtuner import Evaluator
|
2 |
+
|
3 |
+
|
4 |
+
def main():
|
5 |
+
evaluator = Evaluator()
|
6 |
+
evaluator.eval()
|
7 |
+
|
8 |
+
|
9 |
+
if __name__ == "__main__":
|
10 |
+
main()
|
LLM-Detector-V7-11w/src/export_model.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llmtuner import export_model
|
2 |
+
|
3 |
+
|
4 |
+
def main():
|
5 |
+
export_model()
|
6 |
+
|
7 |
+
|
8 |
+
if __name__ == "__main__":
|
9 |
+
main()
|
LLM-Detector-V7-11w/src/llmtuner/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Level: api, webui > chat, eval, train > data, model > extras, hparams
|
2 |
+
|
3 |
+
from llmtuner.api import create_app
|
4 |
+
from llmtuner.chat import ChatModel
|
5 |
+
from llmtuner.eval import Evaluator
|
6 |
+
from llmtuner.train import export_model, run_exp
|
7 |
+
from llmtuner.webui import create_ui, create_web_demo
|
8 |
+
|
9 |
+
|
10 |
+
__version__ = "0.4.0"
|
LLM-Detector-V7-11w/src/llmtuner/api/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.api.app import create_app
|
LLM-Detector-V7-11w/src/llmtuner/api/app.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import asyncio
|
4 |
+
from typing import List, Tuple
|
5 |
+
from pydantic import BaseModel
|
6 |
+
from contextlib import asynccontextmanager
|
7 |
+
|
8 |
+
from llmtuner.api.protocol import (
|
9 |
+
Role,
|
10 |
+
Finish,
|
11 |
+
ModelCard,
|
12 |
+
ModelList,
|
13 |
+
ChatMessage,
|
14 |
+
DeltaMessage,
|
15 |
+
ChatCompletionRequest,
|
16 |
+
ChatCompletionResponse,
|
17 |
+
ChatCompletionStreamResponse,
|
18 |
+
ChatCompletionResponseChoice,
|
19 |
+
ChatCompletionResponseStreamChoice,
|
20 |
+
ChatCompletionResponseUsage,
|
21 |
+
ScoreEvaluationRequest,
|
22 |
+
ScoreEvaluationResponse
|
23 |
+
)
|
24 |
+
from llmtuner.chat import ChatModel
|
25 |
+
from llmtuner.extras.misc import torch_gc
|
26 |
+
from llmtuner.extras.packages import (
|
27 |
+
is_fastapi_availble, is_starlette_available, is_uvicorn_available
|
28 |
+
)
|
29 |
+
|
30 |
+
|
31 |
+
if is_fastapi_availble():
|
32 |
+
from fastapi import FastAPI, HTTPException, status
|
33 |
+
from fastapi.middleware.cors import CORSMiddleware
|
34 |
+
|
35 |
+
|
36 |
+
if is_starlette_available():
|
37 |
+
from sse_starlette import EventSourceResponse
|
38 |
+
|
39 |
+
|
40 |
+
if is_uvicorn_available():
|
41 |
+
import uvicorn
|
42 |
+
|
43 |
+
|
44 |
+
@asynccontextmanager
|
45 |
+
async def lifespan(app: "FastAPI"): # collects GPU memory
|
46 |
+
yield
|
47 |
+
torch_gc()
|
48 |
+
|
49 |
+
|
50 |
+
def to_json(data: BaseModel) -> str:
|
51 |
+
try: # pydantic v2
|
52 |
+
return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
|
53 |
+
except: # pydantic v1
|
54 |
+
return data.json(exclude_unset=True, ensure_ascii=False)
|
55 |
+
|
56 |
+
|
57 |
+
def create_app(chat_model: "ChatModel") -> "FastAPI":
|
58 |
+
app = FastAPI(lifespan=lifespan)
|
59 |
+
|
60 |
+
app.add_middleware(
|
61 |
+
CORSMiddleware,
|
62 |
+
allow_origins=["*"],
|
63 |
+
allow_credentials=True,
|
64 |
+
allow_methods=["*"],
|
65 |
+
allow_headers=["*"],
|
66 |
+
)
|
67 |
+
|
68 |
+
semaphore = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT", 1)))
|
69 |
+
|
70 |
+
@app.get("/v1/models", response_model=ModelList)
|
71 |
+
async def list_models():
|
72 |
+
model_card = ModelCard(id="gpt-3.5-turbo")
|
73 |
+
return ModelList(data=[model_card])
|
74 |
+
|
75 |
+
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse, status_code=status.HTTP_200_OK)
|
76 |
+
async def create_chat_completion(request: ChatCompletionRequest):
|
77 |
+
if not chat_model.can_generate:
|
78 |
+
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
79 |
+
|
80 |
+
if len(request.messages) == 0 or request.messages[-1].role != Role.USER:
|
81 |
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request")
|
82 |
+
|
83 |
+
query = request.messages[-1].content
|
84 |
+
prev_messages = request.messages[:-1]
|
85 |
+
if len(prev_messages) and prev_messages[0].role == Role.SYSTEM:
|
86 |
+
system = prev_messages.pop(0).content
|
87 |
+
else:
|
88 |
+
system = None
|
89 |
+
|
90 |
+
history = []
|
91 |
+
if len(prev_messages) % 2 == 0:
|
92 |
+
for i in range(0, len(prev_messages), 2):
|
93 |
+
if prev_messages[i].role == Role.USER and prev_messages[i+1].role == Role.ASSISTANT:
|
94 |
+
history.append([prev_messages[i].content, prev_messages[i+1].content])
|
95 |
+
else:
|
96 |
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
|
97 |
+
else:
|
98 |
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
|
99 |
+
|
100 |
+
async with semaphore:
|
101 |
+
loop = asyncio.get_running_loop()
|
102 |
+
return await loop.run_in_executor(None, chat_completion, query, history, system, request)
|
103 |
+
|
104 |
+
def chat_completion(query: str, history: List[Tuple[str, str]], system: str, request: ChatCompletionRequest):
|
105 |
+
if request.stream:
|
106 |
+
generate = stream_chat_completion(query, history, system, request)
|
107 |
+
return EventSourceResponse(generate, media_type="text/event-stream")
|
108 |
+
|
109 |
+
responses = chat_model.chat(
|
110 |
+
query, history, system,
|
111 |
+
do_sample=request.do_sample,
|
112 |
+
temperature=request.temperature,
|
113 |
+
top_p=request.top_p,
|
114 |
+
max_new_tokens=request.max_tokens,
|
115 |
+
num_return_sequences=request.n
|
116 |
+
)
|
117 |
+
|
118 |
+
prompt_length, response_length = 0, 0
|
119 |
+
choices = []
|
120 |
+
for i, response in enumerate(responses):
|
121 |
+
choices.append(ChatCompletionResponseChoice(
|
122 |
+
index=i,
|
123 |
+
message=ChatMessage(role=Role.ASSISTANT, content=response.response_text),
|
124 |
+
finish_reason=Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH
|
125 |
+
))
|
126 |
+
prompt_length = response.prompt_length
|
127 |
+
response_length += response.response_length
|
128 |
+
|
129 |
+
usage = ChatCompletionResponseUsage(
|
130 |
+
prompt_tokens=prompt_length,
|
131 |
+
completion_tokens=response_length,
|
132 |
+
total_tokens=prompt_length+response_length
|
133 |
+
)
|
134 |
+
|
135 |
+
return ChatCompletionResponse(model=request.model, choices=choices, usage=usage)
|
136 |
+
|
137 |
+
def stream_chat_completion(query: str, history: List[Tuple[str, str]], system: str, request: ChatCompletionRequest):
|
138 |
+
choice_data = ChatCompletionResponseStreamChoice(
|
139 |
+
index=0,
|
140 |
+
delta=DeltaMessage(role=Role.ASSISTANT, content=""),
|
141 |
+
finish_reason=None
|
142 |
+
)
|
143 |
+
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
144 |
+
yield to_json(chunk)
|
145 |
+
|
146 |
+
for new_text in chat_model.stream_chat(
|
147 |
+
query, history, system,
|
148 |
+
do_sample=request.do_sample,
|
149 |
+
temperature=request.temperature,
|
150 |
+
top_p=request.top_p,
|
151 |
+
max_new_tokens=request.max_tokens
|
152 |
+
):
|
153 |
+
if len(new_text) == 0:
|
154 |
+
continue
|
155 |
+
|
156 |
+
choice_data = ChatCompletionResponseStreamChoice(
|
157 |
+
index=0,
|
158 |
+
delta=DeltaMessage(content=new_text),
|
159 |
+
finish_reason=None
|
160 |
+
)
|
161 |
+
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
162 |
+
yield to_json(chunk)
|
163 |
+
|
164 |
+
choice_data = ChatCompletionResponseStreamChoice(
|
165 |
+
index=0,
|
166 |
+
delta=DeltaMessage(),
|
167 |
+
finish_reason=Finish.STOP
|
168 |
+
)
|
169 |
+
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
170 |
+
yield to_json(chunk)
|
171 |
+
yield "[DONE]"
|
172 |
+
|
173 |
+
@app.post("/v1/score/evaluation", response_model=ScoreEvaluationResponse, status_code=status.HTTP_200_OK)
|
174 |
+
async def create_score_evaluation(request: ScoreEvaluationRequest):
|
175 |
+
if chat_model.can_generate:
|
176 |
+
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
|
177 |
+
|
178 |
+
if len(request.messages) == 0:
|
179 |
+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request")
|
180 |
+
|
181 |
+
async with semaphore:
|
182 |
+
loop = asyncio.get_running_loop()
|
183 |
+
return await loop.run_in_executor(None, get_score, request)
|
184 |
+
|
185 |
+
def get_score(request: ScoreEvaluationRequest):
|
186 |
+
scores = chat_model.get_scores(request.messages, max_length=request.max_length)
|
187 |
+
return ScoreEvaluationResponse(model=request.model, scores=scores)
|
188 |
+
|
189 |
+
return app
|
190 |
+
|
191 |
+
|
192 |
+
if __name__ == "__main__":
|
193 |
+
chat_model = ChatModel()
|
194 |
+
app = create_app(chat_model)
|
195 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
|
LLM-Detector-V7-11w/src/llmtuner/api/protocol.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from enum import Enum
|
3 |
+
from pydantic import BaseModel, Field
|
4 |
+
from typing import List, Optional
|
5 |
+
|
6 |
+
|
7 |
+
class Role(str, Enum):
|
8 |
+
USER = "user"
|
9 |
+
ASSISTANT = "assistant"
|
10 |
+
SYSTEM = "system"
|
11 |
+
|
12 |
+
|
13 |
+
class Finish(str, Enum):
|
14 |
+
STOP = "stop"
|
15 |
+
LENGTH = "length"
|
16 |
+
|
17 |
+
|
18 |
+
class ModelCard(BaseModel):
|
19 |
+
id: str
|
20 |
+
object: Optional[str] = "model"
|
21 |
+
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
|
22 |
+
owned_by: Optional[str] = "owner"
|
23 |
+
|
24 |
+
|
25 |
+
class ModelList(BaseModel):
|
26 |
+
object: Optional[str] = "list"
|
27 |
+
data: Optional[List[ModelCard]] = []
|
28 |
+
|
29 |
+
|
30 |
+
class ChatMessage(BaseModel):
|
31 |
+
role: Role
|
32 |
+
content: str
|
33 |
+
|
34 |
+
|
35 |
+
class DeltaMessage(BaseModel):
|
36 |
+
role: Optional[Role] = None
|
37 |
+
content: Optional[str] = None
|
38 |
+
|
39 |
+
|
40 |
+
class ChatCompletionRequest(BaseModel):
|
41 |
+
model: str
|
42 |
+
messages: List[ChatMessage]
|
43 |
+
do_sample: Optional[bool] = True
|
44 |
+
temperature: Optional[float] = None
|
45 |
+
top_p: Optional[float] = None
|
46 |
+
n: Optional[int] = 1
|
47 |
+
max_tokens: Optional[int] = None
|
48 |
+
stream: Optional[bool] = False
|
49 |
+
|
50 |
+
|
51 |
+
class ChatCompletionResponseChoice(BaseModel):
|
52 |
+
index: int
|
53 |
+
message: ChatMessage
|
54 |
+
finish_reason: Finish
|
55 |
+
|
56 |
+
|
57 |
+
class ChatCompletionResponseStreamChoice(BaseModel):
|
58 |
+
index: int
|
59 |
+
delta: DeltaMessage
|
60 |
+
finish_reason: Optional[Finish] = None
|
61 |
+
|
62 |
+
|
63 |
+
class ChatCompletionResponseUsage(BaseModel):
|
64 |
+
prompt_tokens: int
|
65 |
+
completion_tokens: int
|
66 |
+
total_tokens: int
|
67 |
+
|
68 |
+
|
69 |
+
class ChatCompletionResponse(BaseModel):
|
70 |
+
id: Optional[str] = "chatcmpl-default"
|
71 |
+
object: Optional[str] = "chat.completion"
|
72 |
+
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
|
73 |
+
model: str
|
74 |
+
choices: List[ChatCompletionResponseChoice]
|
75 |
+
usage: ChatCompletionResponseUsage
|
76 |
+
|
77 |
+
|
78 |
+
class ChatCompletionStreamResponse(BaseModel):
|
79 |
+
id: Optional[str] = "chatcmpl-default"
|
80 |
+
object: Optional[str] = "chat.completion.chunk"
|
81 |
+
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
|
82 |
+
model: str
|
83 |
+
choices: List[ChatCompletionResponseStreamChoice]
|
84 |
+
|
85 |
+
|
86 |
+
class ScoreEvaluationRequest(BaseModel):
|
87 |
+
model: str
|
88 |
+
messages: List[str]
|
89 |
+
max_length: Optional[int] = None
|
90 |
+
|
91 |
+
|
92 |
+
class ScoreEvaluationResponse(BaseModel):
|
93 |
+
id: Optional[str] = "scoreeval-default"
|
94 |
+
object: Optional[str] = "score.evaluation"
|
95 |
+
model: str
|
96 |
+
scores: List[float]
|
LLM-Detector-V7-11w/src/llmtuner/chat/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.chat.chat_model import ChatModel
|
LLM-Detector-V7-11w/src/llmtuner/chat/chat_model.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import tiktoken
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from typing import Any, Dict, Generator, List, Literal, Optional, Tuple
|
5 |
+
from threading import Thread
|
6 |
+
from transformers import GenerationConfig, TextIteratorStreamer
|
7 |
+
|
8 |
+
from llmtuner.data.template import get_template_and_fix_tokenizer
|
9 |
+
from llmtuner.extras.misc import get_logits_processor
|
10 |
+
from llmtuner.model import dispatch_model, get_infer_args, load_model_and_tokenizer
|
11 |
+
|
12 |
+
|
13 |
+
@dataclass
|
14 |
+
class Response:
|
15 |
+
|
16 |
+
response_text: str
|
17 |
+
response_length: int
|
18 |
+
prompt_length: int
|
19 |
+
finish_reason: Literal["stop", "length"]
|
20 |
+
|
21 |
+
|
22 |
+
class ChatModel:
|
23 |
+
|
24 |
+
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
|
25 |
+
model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args)
|
26 |
+
self.can_generate = (finetuning_args.stage == "sft")
|
27 |
+
self.model, self.tokenizer = load_model_and_tokenizer(
|
28 |
+
model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate)
|
29 |
+
)
|
30 |
+
self.tokenizer.padding_side = "left" if self.can_generate else "right"
|
31 |
+
self.model = dispatch_model(self.model)
|
32 |
+
self.template = get_template_and_fix_tokenizer(data_args.template, self.tokenizer)
|
33 |
+
|
34 |
+
def _process_args(
|
35 |
+
self,
|
36 |
+
query: str,
|
37 |
+
history: Optional[List[Tuple[str, str]]] = None,
|
38 |
+
system: Optional[str] = None,
|
39 |
+
**input_kwargs
|
40 |
+
) -> Tuple[Dict[str, Any], int]:
|
41 |
+
prompt, _ = self.template.encode_oneturn(
|
42 |
+
tokenizer=self.tokenizer, query=query, resp="", history=history, system=system
|
43 |
+
)
|
44 |
+
prompt_length = len(prompt)
|
45 |
+
input_ids = torch.tensor([prompt], device=self.model.device)
|
46 |
+
|
47 |
+
do_sample = input_kwargs.pop("do_sample", None)
|
48 |
+
temperature = input_kwargs.pop("temperature", None)
|
49 |
+
top_p = input_kwargs.pop("top_p", None)
|
50 |
+
top_k = input_kwargs.pop("top_k", None)
|
51 |
+
num_return_sequences = input_kwargs.pop("num_return_sequences", None)
|
52 |
+
repetition_penalty = input_kwargs.pop("repetition_penalty", None)
|
53 |
+
max_length = input_kwargs.pop("max_length", None)
|
54 |
+
max_new_tokens = input_kwargs.pop("max_new_tokens", None)
|
55 |
+
|
56 |
+
generating_args = self.generating_args.to_dict()
|
57 |
+
generating_args.update(dict(
|
58 |
+
do_sample=do_sample if do_sample is not None else generating_args["do_sample"],
|
59 |
+
temperature=temperature or generating_args["temperature"],
|
60 |
+
top_p=top_p or generating_args["top_p"],
|
61 |
+
top_k=top_k or generating_args["top_k"],
|
62 |
+
num_return_sequences=num_return_sequences or 1,
|
63 |
+
repetition_penalty=repetition_penalty or generating_args["repetition_penalty"],
|
64 |
+
eos_token_id=[self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids,
|
65 |
+
pad_token_id=self.tokenizer.pad_token_id
|
66 |
+
))
|
67 |
+
|
68 |
+
if isinstance(num_return_sequences, int) and num_return_sequences > 1:
|
69 |
+
generating_args["do_sample"] = True
|
70 |
+
|
71 |
+
if max_length:
|
72 |
+
generating_args.pop("max_new_tokens", None)
|
73 |
+
generating_args["max_length"] = max_length
|
74 |
+
|
75 |
+
if max_new_tokens:
|
76 |
+
generating_args.pop("max_length", None)
|
77 |
+
generating_args["max_new_tokens"] = max_new_tokens
|
78 |
+
|
79 |
+
gen_kwargs = dict(
|
80 |
+
inputs=input_ids,
|
81 |
+
generation_config=GenerationConfig(**generating_args),
|
82 |
+
logits_processor=get_logits_processor()
|
83 |
+
)
|
84 |
+
|
85 |
+
return gen_kwargs, prompt_length
|
86 |
+
|
87 |
+
@torch.inference_mode()
|
88 |
+
def chat(
|
89 |
+
self,
|
90 |
+
query: str,
|
91 |
+
history: Optional[List[Tuple[str, str]]] = None,
|
92 |
+
system: Optional[str] = None,
|
93 |
+
**input_kwargs
|
94 |
+
) -> List[Response]:
|
95 |
+
r"""
|
96 |
+
Args: query, history, system, **input_kwargs
|
97 |
+
|
98 |
+
Returns: [(response_text, prompt_length, response_length)] * n (default n=1)
|
99 |
+
"""
|
100 |
+
gen_kwargs, prompt_length = self._process_args(query, history, system, **input_kwargs)
|
101 |
+
generate_output = self.model.generate(**gen_kwargs)
|
102 |
+
response_ids = generate_output[:, prompt_length:]
|
103 |
+
response = self.tokenizer.batch_decode(
|
104 |
+
response_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
105 |
+
)
|
106 |
+
results = []
|
107 |
+
for i in range(len(response)):
|
108 |
+
eos_index = (response_ids[i] == self.tokenizer.eos_token_id).nonzero()
|
109 |
+
response_length = (eos_index[0].item() + 1) if len(eos_index) else len(response_ids[i])
|
110 |
+
results.append(Response(
|
111 |
+
response_text=response[i],
|
112 |
+
response_length=response_length,
|
113 |
+
prompt_length=prompt_length,
|
114 |
+
finish_reason="stop" if len(eos_index) else "length"
|
115 |
+
))
|
116 |
+
|
117 |
+
return results
|
118 |
+
|
119 |
+
@torch.inference_mode()
|
120 |
+
def stream_chat(
|
121 |
+
self,
|
122 |
+
query: str,
|
123 |
+
history: Optional[List[Tuple[str, str]]] = None,
|
124 |
+
system: Optional[str] = None,
|
125 |
+
**input_kwargs
|
126 |
+
) -> Generator[str, None, None]:
|
127 |
+
gen_kwargs, _ = self._process_args(query, history, system, **input_kwargs)
|
128 |
+
streamer = TextIteratorStreamer(self.tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
129 |
+
gen_kwargs["streamer"] = streamer
|
130 |
+
|
131 |
+
thread = Thread(target=self.model.generate, kwargs=gen_kwargs)
|
132 |
+
thread.start()
|
133 |
+
|
134 |
+
yield from streamer
|
135 |
+
|
136 |
+
@torch.inference_mode()
|
137 |
+
def get_scores(
|
138 |
+
self,
|
139 |
+
batch_input: List[str],
|
140 |
+
**input_kwargs
|
141 |
+
) -> List[float]:
|
142 |
+
if isinstance(getattr(self.tokenizer, "tokenizer", None), tiktoken.Encoding): # for tiktoken tokenizer (Qwen)
|
143 |
+
kwargs = dict(allowed_special="all")
|
144 |
+
else:
|
145 |
+
kwargs = dict(add_special_tokens=True)
|
146 |
+
|
147 |
+
max_length = input_kwargs.pop("max_length", None)
|
148 |
+
device = getattr(self.model.pretrained_model, "device", "cuda")
|
149 |
+
|
150 |
+
inputs = self.tokenizer(
|
151 |
+
batch_input,
|
152 |
+
padding=True,
|
153 |
+
truncation=True,
|
154 |
+
max_length=max_length or getattr(self.model.config, "max_position_embeddings", 1024),
|
155 |
+
return_tensors="pt",
|
156 |
+
**kwargs
|
157 |
+
).to(device)
|
158 |
+
|
159 |
+
input_ids: torch.Tensor = inputs["input_ids"]
|
160 |
+
_, _, values = self.model(**inputs, output_hidden_states=True, return_dict=True)
|
161 |
+
|
162 |
+
if getattr(self.model.config, "model_type", None) == "chatglm":
|
163 |
+
values = torch.transpose(values, 0, 1)
|
164 |
+
|
165 |
+
scores = []
|
166 |
+
for i in range(input_ids.size(0)):
|
167 |
+
end_indexes = (input_ids[i] != self.tokenizer.pad_token_id).nonzero()
|
168 |
+
end_index = end_indexes[-1].item() if len(end_indexes) else 0
|
169 |
+
scores.append(values[i, end_index].nan_to_num().item())
|
170 |
+
|
171 |
+
return scores
|
LLM-Detector-V7-11w/src/llmtuner/data/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llmtuner.data.loader import get_dataset
|
2 |
+
from llmtuner.data.preprocess import preprocess_dataset
|
3 |
+
from llmtuner.data.template import get_template_and_fix_tokenizer
|
4 |
+
from llmtuner.data.utils import split_dataset
|
LLM-Detector-V7-11w/src/llmtuner/data/loader.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import TYPE_CHECKING, Any, Dict, List, Union
|
3 |
+
|
4 |
+
from datasets import concatenate_datasets, interleave_datasets, load_dataset, load_from_disk
|
5 |
+
|
6 |
+
from llmtuner.data.utils import checksum
|
7 |
+
from llmtuner.extras.constants import FILEEXT2TYPE
|
8 |
+
from llmtuner.extras.logging import get_logger
|
9 |
+
|
10 |
+
if TYPE_CHECKING:
|
11 |
+
from datasets import Dataset, IterableDataset
|
12 |
+
from llmtuner.hparams import ModelArguments, DataArguments
|
13 |
+
|
14 |
+
|
15 |
+
logger = get_logger(__name__)
|
16 |
+
|
17 |
+
|
18 |
+
def get_dataset(
|
19 |
+
model_args: "ModelArguments",
|
20 |
+
data_args: "DataArguments"
|
21 |
+
) -> Union["Dataset", "IterableDataset"]:
|
22 |
+
max_samples = data_args.max_samples
|
23 |
+
all_datasets: List[Union["Dataset", "IterableDataset"]] = [] # support multiple datasets
|
24 |
+
|
25 |
+
if data_args.cache_path is not None:
|
26 |
+
if os.path.exists(data_args.cache_path):
|
27 |
+
logger.warning("Loading dataset from disk will ignore other data arguments.")
|
28 |
+
dataset = load_from_disk(data_args.cache_path)
|
29 |
+
if data_args.streaming:
|
30 |
+
dataset = dataset.to_iterable_dataset()
|
31 |
+
return dataset
|
32 |
+
elif data_args.streaming:
|
33 |
+
raise ValueError("Turn off dataset streaming to save cache files.")
|
34 |
+
|
35 |
+
for dataset_attr in data_args.dataset_list:
|
36 |
+
logger.info("Loading dataset {}...".format(dataset_attr))
|
37 |
+
|
38 |
+
data_path, data_name, data_dir, data_files = None, None, None, None
|
39 |
+
if dataset_attr.load_from in ["hf_hub", "ms_hub"]:
|
40 |
+
data_path = dataset_attr.dataset_name
|
41 |
+
data_name = dataset_attr.subset
|
42 |
+
data_dir = dataset_attr.folder
|
43 |
+
elif dataset_attr.load_from == "script":
|
44 |
+
data_path = os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)
|
45 |
+
data_name = dataset_attr.subset
|
46 |
+
elif dataset_attr.load_from == "file":
|
47 |
+
data_files = []
|
48 |
+
local_path: str = os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)
|
49 |
+
if os.path.isdir(local_path): # is directory
|
50 |
+
for file_name in os.listdir(local_path):
|
51 |
+
data_files.append(os.path.join(local_path, file_name))
|
52 |
+
if data_path is None:
|
53 |
+
data_path = FILEEXT2TYPE.get(file_name.split(".")[-1], None)
|
54 |
+
else:
|
55 |
+
assert data_path == FILEEXT2TYPE.get(file_name.split(".")[-1], None), "file types are not identical."
|
56 |
+
elif os.path.isfile(local_path): # is file
|
57 |
+
data_files.append(local_path)
|
58 |
+
data_path = FILEEXT2TYPE.get(local_path.split(".")[-1], None)
|
59 |
+
else:
|
60 |
+
raise ValueError("File not found.")
|
61 |
+
|
62 |
+
assert data_path, "File extension must be txt, csv, json or jsonl."
|
63 |
+
checksum(data_files, dataset_attr.dataset_sha1)
|
64 |
+
else:
|
65 |
+
raise NotImplementedError
|
66 |
+
|
67 |
+
if dataset_attr.load_from == "ms_hub":
|
68 |
+
try:
|
69 |
+
from modelscope import MsDataset
|
70 |
+
from modelscope.utils.config_ds import MS_DATASETS_CACHE
|
71 |
+
|
72 |
+
cache_dir = model_args.cache_dir or MS_DATASETS_CACHE
|
73 |
+
dataset = MsDataset.load(
|
74 |
+
dataset_name=data_path,
|
75 |
+
subset_name=data_name,
|
76 |
+
data_dir=data_dir,
|
77 |
+
data_files=data_files,
|
78 |
+
split=data_args.split,
|
79 |
+
cache_dir=cache_dir,
|
80 |
+
token=model_args.ms_hub_token,
|
81 |
+
use_streaming=(data_args.streaming and (dataset_attr.load_from != "file"))
|
82 |
+
).to_hf_dataset()
|
83 |
+
except ImportError:
|
84 |
+
raise ImportError("Please install modelscope via `pip install modelscope -U`")
|
85 |
+
else:
|
86 |
+
dataset = load_dataset(
|
87 |
+
path=data_path,
|
88 |
+
name=data_name,
|
89 |
+
data_dir=data_dir,
|
90 |
+
data_files=data_files,
|
91 |
+
split=data_args.split,
|
92 |
+
cache_dir=model_args.cache_dir,
|
93 |
+
token=model_args.hf_hub_token,
|
94 |
+
streaming=(data_args.streaming and (dataset_attr.load_from != "file"))
|
95 |
+
)
|
96 |
+
|
97 |
+
if data_args.streaming and (dataset_attr.load_from == "file"): # faster than specifying streaming=True
|
98 |
+
dataset = dataset.to_iterable_dataset() # TODO: add num shards parameter
|
99 |
+
|
100 |
+
if max_samples is not None: # truncate dataset
|
101 |
+
dataset = dataset.select(range(min(len(dataset), max_samples)))
|
102 |
+
|
103 |
+
def convert_format(examples: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
|
104 |
+
# convert dataset from sharegpt format to alpaca format
|
105 |
+
outputs = {"prompt": [], "query": [], "response": [], "history": [], "system": []}
|
106 |
+
for i, msg_list in enumerate(examples[dataset_attr.messages]):
|
107 |
+
msg_list = msg_list[:len(msg_list) // 2 * 2] # should be multiples of 2
|
108 |
+
if len(msg_list) == 0:
|
109 |
+
continue
|
110 |
+
|
111 |
+
msg_pairs = []
|
112 |
+
user_role, assistant_role = None, None
|
113 |
+
for idx in range(0, len(msg_list), 2):
|
114 |
+
if user_role is None and assistant_role is None:
|
115 |
+
user_role = msg_list[idx][dataset_attr.role]
|
116 |
+
assistant_role = msg_list[idx + 1][dataset_attr.role]
|
117 |
+
else:
|
118 |
+
if (
|
119 |
+
msg_list[idx][dataset_attr.role] != user_role
|
120 |
+
or msg_list[idx+1][dataset_attr.role] != assistant_role
|
121 |
+
):
|
122 |
+
raise ValueError("Only accepts conversation in u/a/u/a/u/a order.")
|
123 |
+
msg_pairs.append((msg_list[idx][dataset_attr.content], msg_list[idx + 1][dataset_attr.content]))
|
124 |
+
|
125 |
+
if len(msg_pairs) != 0:
|
126 |
+
outputs["prompt"].append(msg_pairs[-1][0])
|
127 |
+
outputs["query"].append("")
|
128 |
+
outputs["response"].append(msg_pairs[-1][1])
|
129 |
+
outputs["history"].append(msg_pairs[:-1] if len(msg_pairs) > 1 else None)
|
130 |
+
outputs["system"].append(examples[dataset_attr.system][i] if dataset_attr.system else "")
|
131 |
+
|
132 |
+
return outputs
|
133 |
+
|
134 |
+
if dataset_attr.formatting == "sharegpt": # convert format
|
135 |
+
column_names = list(next(iter(dataset)).keys())
|
136 |
+
kwargs = {}
|
137 |
+
if not data_args.streaming:
|
138 |
+
kwargs = dict(
|
139 |
+
num_proc=data_args.preprocessing_num_workers,
|
140 |
+
load_from_cache_file=(not data_args.overwrite_cache),
|
141 |
+
desc="Converting format of dataset"
|
142 |
+
)
|
143 |
+
|
144 |
+
dataset = dataset.map(
|
145 |
+
convert_format,
|
146 |
+
batched=True,
|
147 |
+
remove_columns=column_names,
|
148 |
+
**kwargs
|
149 |
+
)
|
150 |
+
else:
|
151 |
+
for column_name in ["prompt", "query", "response", "history", "system"]: # align dataset
|
152 |
+
if getattr(dataset_attr, column_name) and getattr(dataset_attr, column_name) != column_name:
|
153 |
+
dataset = dataset.rename_column(getattr(dataset_attr, column_name), column_name)
|
154 |
+
|
155 |
+
all_datasets.append(dataset)
|
156 |
+
|
157 |
+
if len(data_args.dataset_list) == 1:
|
158 |
+
return all_datasets[0]
|
159 |
+
elif data_args.mix_strategy == "concat":
|
160 |
+
if data_args.streaming:
|
161 |
+
logger.warning("The samples between different datasets will not be mixed in streaming mode.")
|
162 |
+
return concatenate_datasets(all_datasets)
|
163 |
+
elif data_args.mix_strategy.startswith("interleave"):
|
164 |
+
if not data_args.streaming:
|
165 |
+
logger.warning("We recommend using `mix_strategy=concat` in non-streaming mode.")
|
166 |
+
return interleave_datasets(
|
167 |
+
datasets=all_datasets,
|
168 |
+
probabilities=data_args.interleave_probs,
|
169 |
+
seed=data_args.seed,
|
170 |
+
stopping_strategy="first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted"
|
171 |
+
)
|
172 |
+
else:
|
173 |
+
raise ValueError("Unknown mixing strategy.")
|
LLM-Detector-V7-11w/src/llmtuner/data/preprocess.py
ADDED
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tiktoken
|
3 |
+
from itertools import chain
|
4 |
+
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Literal, Tuple, Union
|
5 |
+
|
6 |
+
from llmtuner.data.template import get_template_and_fix_tokenizer
|
7 |
+
from llmtuner.extras.constants import IGNORE_INDEX
|
8 |
+
from llmtuner.extras.logging import get_logger
|
9 |
+
|
10 |
+
if TYPE_CHECKING:
|
11 |
+
from datasets import Dataset, IterableDataset
|
12 |
+
from transformers import Seq2SeqTrainingArguments
|
13 |
+
from transformers.tokenization_utils import PreTrainedTokenizer
|
14 |
+
from llmtuner.hparams import DataArguments
|
15 |
+
|
16 |
+
|
17 |
+
logger = get_logger(__name__)
|
18 |
+
|
19 |
+
|
20 |
+
def construct_example(examples: Dict[str, List[Any]]) -> Generator[Any, None, None]:
|
21 |
+
for i in range(len(examples["prompt"])):
|
22 |
+
query, response = examples["prompt"][i], examples["response"][i]
|
23 |
+
query = query + "\n" + examples["query"][i] if "query" in examples and examples["query"][i] else query
|
24 |
+
history = examples["history"][i] if "history" in examples else None
|
25 |
+
system = examples["system"][i] if "system" in examples else None
|
26 |
+
yield query, response, history, system
|
27 |
+
|
28 |
+
|
29 |
+
def infer_max_len(source_len: int, target_len: int, data_args: "DataArguments") -> Tuple[int, int]:
|
30 |
+
max_target_len = int(data_args.cutoff_len * (target_len / (source_len + target_len)))
|
31 |
+
max_target_len = max(max_target_len, data_args.reserved_label_len)
|
32 |
+
max_source_len = data_args.cutoff_len - max_target_len
|
33 |
+
return max_source_len, max_target_len
|
34 |
+
|
35 |
+
|
36 |
+
def preprocess_dataset(
|
37 |
+
dataset: Union["Dataset", "IterableDataset"],
|
38 |
+
tokenizer: "PreTrainedTokenizer",
|
39 |
+
data_args: "DataArguments",
|
40 |
+
training_args: "Seq2SeqTrainingArguments",
|
41 |
+
stage: Literal["pt", "sft", "rm", "ppo"]
|
42 |
+
) -> Union["Dataset", "IterableDataset"]:
|
43 |
+
template = get_template_and_fix_tokenizer(data_args.template, tokenizer)
|
44 |
+
|
45 |
+
if data_args.cache_path is not None and os.path.exists(data_args.cache_path):
|
46 |
+
return dataset # already preprocessed
|
47 |
+
|
48 |
+
if data_args.train_on_prompt and template.efficient_eos:
|
49 |
+
raise ValueError("Current template does not support `train_on_prompt`.")
|
50 |
+
|
51 |
+
def preprocess_pretrain_dataset(examples: Dict[str, List[Any]]) -> Dict[str, List[List[int]]]:
|
52 |
+
# build grouped texts with format `X1 X2 X3 ...`
|
53 |
+
if isinstance(getattr(tokenizer, "tokenizer", None), tiktoken.Encoding): # for tiktoken tokenizer (Qwen)
|
54 |
+
kwargs = dict(allowed_special="all")
|
55 |
+
else:
|
56 |
+
kwargs = dict(add_special_tokens=True)
|
57 |
+
|
58 |
+
if hasattr(tokenizer, "add_eos_token"): # for LLaMA tokenizer
|
59 |
+
add_eos_token_flag = getattr(tokenizer, "add_eos_token")
|
60 |
+
setattr(tokenizer, "add_eos_token", True)
|
61 |
+
|
62 |
+
tokenized_examples = tokenizer(examples["prompt"], **kwargs)
|
63 |
+
concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()}
|
64 |
+
total_length = len(concatenated_examples[list(concatenated_examples.keys())[0]])
|
65 |
+
block_size = data_args.cutoff_len
|
66 |
+
# we drop the small remainder, and if the total_length < block_size, we exclude this batch
|
67 |
+
total_length = (total_length // block_size) * block_size
|
68 |
+
# split by chunks of cutoff_len
|
69 |
+
result = {
|
70 |
+
k: [t[i: i + block_size] for i in range(0, total_length, block_size)]
|
71 |
+
for k, t in concatenated_examples.items()
|
72 |
+
}
|
73 |
+
# make sure the saved tokenizer is the same as the original one
|
74 |
+
if hasattr(tokenizer, "add_eos_token"):
|
75 |
+
setattr(tokenizer, "add_eos_token", add_eos_token_flag)
|
76 |
+
return result
|
77 |
+
|
78 |
+
def preprocess_supervised_dataset(examples: Dict[str, List[Any]]) -> Dict[str, List[List[int]]]:
|
79 |
+
# build inputs with format `<bos> X Y <eos>` and labels with format `<ignore> ... <ignore> Y <eos>`
|
80 |
+
# for multiturn examples, we only mask the prompt part in each prompt-response pair.
|
81 |
+
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
|
82 |
+
|
83 |
+
for query, response, history, system in construct_example(examples):
|
84 |
+
if not (isinstance(query, str) and isinstance(response, str) and query != "" and response != ""):
|
85 |
+
continue
|
86 |
+
|
87 |
+
input_ids, labels = [], []
|
88 |
+
for turn_idx, (source_ids, target_ids) in enumerate(template.encode_multiturn(
|
89 |
+
tokenizer, query, response, history, system
|
90 |
+
)):
|
91 |
+
source_len, target_len = len(source_ids), len(target_ids)
|
92 |
+
max_source_len, max_target_len = infer_max_len(source_len, target_len, data_args)
|
93 |
+
if source_len > max_source_len:
|
94 |
+
source_ids = source_ids[:max_source_len]
|
95 |
+
if target_len > max_target_len:
|
96 |
+
target_ids = target_ids[:max_target_len]
|
97 |
+
|
98 |
+
if data_args.train_on_prompt:
|
99 |
+
source_mask = source_ids
|
100 |
+
elif turn_idx != 0 and template.efficient_eos:
|
101 |
+
source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
|
102 |
+
else:
|
103 |
+
source_mask = [IGNORE_INDEX] * len(source_ids)
|
104 |
+
|
105 |
+
input_ids += source_ids + target_ids
|
106 |
+
labels += source_mask + target_ids
|
107 |
+
|
108 |
+
if template.efficient_eos:
|
109 |
+
input_ids += [tokenizer.eos_token_id]
|
110 |
+
labels += [tokenizer.eos_token_id]
|
111 |
+
|
112 |
+
if len(input_ids) > data_args.cutoff_len:
|
113 |
+
input_ids = input_ids[:data_args.cutoff_len]
|
114 |
+
labels = labels[:data_args.cutoff_len]
|
115 |
+
|
116 |
+
model_inputs["input_ids"].append(input_ids)
|
117 |
+
model_inputs["attention_mask"].append([1] * len(input_ids))
|
118 |
+
model_inputs["labels"].append(labels)
|
119 |
+
|
120 |
+
return model_inputs
|
121 |
+
|
122 |
+
def preprocess_packed_supervised_dataset(examples: Dict[str, List[Any]]) -> Dict[str, List[List[int]]]:
|
123 |
+
# build inputs with format `<bos> X1 Y1 <eos> <bos> X2 Y2 <eos>`
|
124 |
+
# and labels with format `<ignore> ... <ignore> Y1 <eos> <ignore> ... <ignore> Y2 <eos>`
|
125 |
+
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
|
126 |
+
input_ids, labels = [], []
|
127 |
+
for query, response, history, system in construct_example(examples):
|
128 |
+
if not (isinstance(query, str) and isinstance(response, str) and query != "" and response != ""):
|
129 |
+
continue
|
130 |
+
|
131 |
+
for turn_idx, (source_ids, target_ids) in enumerate(template.encode_multiturn(
|
132 |
+
tokenizer, query, response, history, system
|
133 |
+
)):
|
134 |
+
if data_args.train_on_prompt:
|
135 |
+
source_mask = source_ids
|
136 |
+
elif turn_idx != 0 and template.efficient_eos:
|
137 |
+
source_mask = [tokenizer.eos_token_id] + [IGNORE_INDEX] * (len(source_ids) - 1)
|
138 |
+
else:
|
139 |
+
source_mask = [IGNORE_INDEX] * len(source_ids)
|
140 |
+
input_ids += source_ids + target_ids
|
141 |
+
labels += source_mask + target_ids
|
142 |
+
|
143 |
+
if template.efficient_eos:
|
144 |
+
input_ids += [tokenizer.eos_token_id]
|
145 |
+
labels += [tokenizer.eos_token_id]
|
146 |
+
|
147 |
+
total_length = len(input_ids)
|
148 |
+
block_size = data_args.cutoff_len
|
149 |
+
# we drop the small remainder, and if the total_length < block_size, we exclude this batch
|
150 |
+
total_length = (total_length // block_size) * block_size
|
151 |
+
# split by chunks of cutoff_len
|
152 |
+
for i in range(0, total_length, block_size):
|
153 |
+
model_inputs["input_ids"].append(input_ids[i: i + block_size])
|
154 |
+
model_inputs["attention_mask"].append([1] * block_size)
|
155 |
+
model_inputs["labels"].append(labels[i: i + block_size])
|
156 |
+
|
157 |
+
return model_inputs
|
158 |
+
|
159 |
+
def preprocess_unsupervised_dataset(examples: Dict[str, List[Any]]) -> Dict[str, List[List[int]]]:
|
160 |
+
# build inputs with format `<bos> X` and labels with format `Y <eos>`
|
161 |
+
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
|
162 |
+
|
163 |
+
for query, response, history, system in construct_example(examples):
|
164 |
+
if not (isinstance(query, str) and query != ""):
|
165 |
+
continue
|
166 |
+
|
167 |
+
input_ids, labels = template.encode_oneturn(tokenizer, query, response, history, system)
|
168 |
+
|
169 |
+
if template.efficient_eos:
|
170 |
+
labels += [tokenizer.eos_token_id]
|
171 |
+
|
172 |
+
if len(input_ids) > data_args.cutoff_len:
|
173 |
+
input_ids = input_ids[:data_args.cutoff_len]
|
174 |
+
if len(labels) > data_args.cutoff_len:
|
175 |
+
labels = labels[:data_args.cutoff_len]
|
176 |
+
|
177 |
+
model_inputs["input_ids"].append(input_ids)
|
178 |
+
model_inputs["attention_mask"].append([1] * len(input_ids))
|
179 |
+
model_inputs["labels"].append(labels)
|
180 |
+
|
181 |
+
return model_inputs
|
182 |
+
|
183 |
+
def preprocess_pairwise_dataset(examples: Dict[str, List[Any]]) -> Dict[str, List[List[int]]]:
|
184 |
+
# build input pairs with format `<bos> X`, `Y1 <eos>` and `Y2 <eos>`
|
185 |
+
model_inputs = {"prompt_ids": [], "chosen_ids": [], "rejected_ids": []}
|
186 |
+
for query, response, history, system in construct_example(examples):
|
187 |
+
if not (isinstance(query, str) and isinstance(response, list) and query != "" and len(response) > 1):
|
188 |
+
continue
|
189 |
+
|
190 |
+
prompt_ids, chosen_ids = template.encode_oneturn(tokenizer, query, response[0], history, system)
|
191 |
+
_, rejected_ids = template.encode_oneturn(tokenizer, query, response[1], history, system)
|
192 |
+
|
193 |
+
if template.efficient_eos:
|
194 |
+
chosen_ids += [tokenizer.eos_token_id]
|
195 |
+
rejected_ids += [tokenizer.eos_token_id]
|
196 |
+
|
197 |
+
source_len, target_len = len(prompt_ids), max(len(chosen_ids), len(rejected_ids))
|
198 |
+
max_source_len, max_target_len = infer_max_len(source_len, target_len, data_args)
|
199 |
+
if source_len > max_source_len:
|
200 |
+
prompt_ids = prompt_ids[:max_source_len]
|
201 |
+
if target_len > max_target_len:
|
202 |
+
chosen_ids = chosen_ids[:max_target_len]
|
203 |
+
rejected_ids = rejected_ids[:max_target_len]
|
204 |
+
|
205 |
+
model_inputs["prompt_ids"].append(prompt_ids)
|
206 |
+
model_inputs["chosen_ids"].append(chosen_ids)
|
207 |
+
model_inputs["rejected_ids"].append(rejected_ids)
|
208 |
+
|
209 |
+
return model_inputs
|
210 |
+
|
211 |
+
def print_supervised_dataset_example(example: Dict[str, List[int]]) -> None:
|
212 |
+
print("input_ids:\n{}".format(example["input_ids"]))
|
213 |
+
print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
|
214 |
+
print("label_ids:\n{}".format(example["labels"]))
|
215 |
+
print("labels:\n{}".format(
|
216 |
+
tokenizer.decode(list(filter(lambda x: x != IGNORE_INDEX, example["labels"])), skip_special_tokens=False)
|
217 |
+
))
|
218 |
+
|
219 |
+
def print_pairwise_dataset_example(example: Dict[str, List[int]]) -> None:
|
220 |
+
print("prompt_ids:\n{}".format(example["prompt_ids"]))
|
221 |
+
print("prompt:\n{}".format(tokenizer.decode(example["prompt_ids"], skip_special_tokens=False)))
|
222 |
+
print("chosen_ids:\n{}".format(example["chosen_ids"]))
|
223 |
+
print("chosen:\n{}".format(tokenizer.decode(example["chosen_ids"], skip_special_tokens=False)))
|
224 |
+
print("rejected_ids:\n{}".format(example["rejected_ids"]))
|
225 |
+
print("rejected:\n{}".format(tokenizer.decode(example["rejected_ids"], skip_special_tokens=False)))
|
226 |
+
|
227 |
+
def print_unsupervised_dataset_example(example: Dict[str, List[int]]) -> None:
|
228 |
+
print("input_ids:\n{}".format(example["input_ids"]))
|
229 |
+
print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
|
230 |
+
|
231 |
+
if stage == "pt":
|
232 |
+
preprocess_func = preprocess_pretrain_dataset
|
233 |
+
print_function = print_unsupervised_dataset_example
|
234 |
+
elif stage == "sft" and not training_args.predict_with_generate:
|
235 |
+
preprocess_func = preprocess_packed_supervised_dataset if data_args.sft_packing else preprocess_supervised_dataset
|
236 |
+
print_function = print_supervised_dataset_example
|
237 |
+
elif stage == "rm":
|
238 |
+
preprocess_func = preprocess_pairwise_dataset
|
239 |
+
print_function = print_pairwise_dataset_example
|
240 |
+
else:
|
241 |
+
preprocess_func = preprocess_unsupervised_dataset
|
242 |
+
print_function = print_unsupervised_dataset_example
|
243 |
+
|
244 |
+
with training_args.main_process_first(desc="dataset map pre-processing"):
|
245 |
+
column_names = list(next(iter(dataset)).keys())
|
246 |
+
kwargs = {}
|
247 |
+
if not data_args.streaming:
|
248 |
+
kwargs = dict(
|
249 |
+
num_proc=data_args.preprocessing_num_workers,
|
250 |
+
load_from_cache_file=(not data_args.overwrite_cache),
|
251 |
+
desc="Running tokenizer on dataset"
|
252 |
+
)
|
253 |
+
|
254 |
+
dataset = dataset.map(
|
255 |
+
preprocess_func,
|
256 |
+
batched=True,
|
257 |
+
remove_columns=column_names,
|
258 |
+
**kwargs
|
259 |
+
)
|
260 |
+
|
261 |
+
if data_args.cache_path is not None and not os.path.exists(data_args.cache_path):
|
262 |
+
if training_args.should_save:
|
263 |
+
dataset.save_to_disk(data_args.cache_path)
|
264 |
+
logger.info("Dataset cache saved at {}.".format(data_args.cache_path))
|
265 |
+
|
266 |
+
if training_args.should_log:
|
267 |
+
try:
|
268 |
+
print_function(next(iter(dataset)))
|
269 |
+
except StopIteration:
|
270 |
+
raise RuntimeError("Empty dataset!")
|
271 |
+
|
272 |
+
return dataset
|
LLM-Detector-V7-11w/src/llmtuner/data/template.py
ADDED
@@ -0,0 +1,815 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tiktoken
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
4 |
+
|
5 |
+
from llmtuner.extras.logging import get_logger
|
6 |
+
|
7 |
+
if TYPE_CHECKING:
|
8 |
+
from transformers import PreTrainedTokenizer
|
9 |
+
|
10 |
+
|
11 |
+
logger = get_logger(__name__)
|
12 |
+
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class Template:
|
16 |
+
|
17 |
+
prefix: List[Union[str, Dict[str, str]]]
|
18 |
+
prompt: List[Union[str, Dict[str, str]]]
|
19 |
+
system: str
|
20 |
+
sep: List[Union[str, Dict[str, str]]]
|
21 |
+
stop_words: List[str]
|
22 |
+
use_history: bool
|
23 |
+
efficient_eos: bool
|
24 |
+
replace_eos: bool
|
25 |
+
|
26 |
+
def encode_oneturn(
|
27 |
+
self,
|
28 |
+
tokenizer: "PreTrainedTokenizer",
|
29 |
+
query: str,
|
30 |
+
resp: str,
|
31 |
+
history: Optional[List[Tuple[str, str]]] = None,
|
32 |
+
system: Optional[str] = None
|
33 |
+
) -> Tuple[List[int], List[int]]:
|
34 |
+
r"""
|
35 |
+
Returns a single pair of token ids representing prompt and response respectively.
|
36 |
+
"""
|
37 |
+
system, history = self._format(query, resp, history, system)
|
38 |
+
encoded_pairs = self._encode(tokenizer, system, history)
|
39 |
+
prompt_ids = []
|
40 |
+
for query_ids, resp_ids in encoded_pairs[:-1]:
|
41 |
+
prompt_ids = prompt_ids + query_ids + resp_ids
|
42 |
+
prompt_ids = prompt_ids + encoded_pairs[-1][0]
|
43 |
+
answer_ids = encoded_pairs[-1][1]
|
44 |
+
return prompt_ids, answer_ids
|
45 |
+
|
46 |
+
def encode_multiturn(
|
47 |
+
self,
|
48 |
+
tokenizer: "PreTrainedTokenizer",
|
49 |
+
query: str,
|
50 |
+
resp: str,
|
51 |
+
history: Optional[List[Tuple[str, str]]] = None,
|
52 |
+
system: Optional[str] = None
|
53 |
+
) -> List[Tuple[List[int], List[int]]]:
|
54 |
+
r"""
|
55 |
+
Returns multiple pairs of token ids representing prompts and responses respectively.
|
56 |
+
"""
|
57 |
+
system, history = self._format(query, resp, history, system)
|
58 |
+
encoded_pairs = self._encode(tokenizer, system, history)
|
59 |
+
return encoded_pairs
|
60 |
+
|
61 |
+
def _format(
|
62 |
+
self,
|
63 |
+
query: str,
|
64 |
+
resp: str,
|
65 |
+
history: Optional[List[Tuple[str, str]]] = None,
|
66 |
+
system: Optional[str] = None
|
67 |
+
) -> Tuple[str, List[Tuple[str, str]]]:
|
68 |
+
r"""
|
69 |
+
Aligns inputs to the standard format.
|
70 |
+
"""
|
71 |
+
system = system or self.system # use system if provided
|
72 |
+
history = history if (history and self.use_history) else []
|
73 |
+
history = history + [(query, resp)]
|
74 |
+
return system, history
|
75 |
+
|
76 |
+
def _get_special_ids(
|
77 |
+
self,
|
78 |
+
tokenizer: "PreTrainedTokenizer"
|
79 |
+
) -> Tuple[List[int], List[int]]:
|
80 |
+
if tokenizer.bos_token_id is not None and getattr(tokenizer, "add_bos_token", True):
|
81 |
+
bos_ids = [tokenizer.bos_token_id]
|
82 |
+
else: # baichuan, gpt2, qwen, yi models have no bos token
|
83 |
+
bos_ids = []
|
84 |
+
|
85 |
+
if tokenizer.eos_token_id is None:
|
86 |
+
raise ValueError("EOS token is required.")
|
87 |
+
|
88 |
+
if self.efficient_eos:
|
89 |
+
eos_ids = []
|
90 |
+
else:
|
91 |
+
eos_ids = [tokenizer.eos_token_id]
|
92 |
+
|
93 |
+
return bos_ids, eos_ids
|
94 |
+
|
95 |
+
def _encode(
|
96 |
+
self,
|
97 |
+
tokenizer: "PreTrainedTokenizer",
|
98 |
+
system: str,
|
99 |
+
history: List[Tuple[str, str]]
|
100 |
+
) -> List[Tuple[List[int], List[int]]]:
|
101 |
+
r"""
|
102 |
+
Encodes formatted inputs to pairs of token ids.
|
103 |
+
Turn 0: bos + prefix + sep + query resp + eos
|
104 |
+
Turn t: sep + bos + query resp + eos
|
105 |
+
"""
|
106 |
+
bos_ids, eos_ids = self._get_special_ids(tokenizer)
|
107 |
+
sep_ids = self._convert_inputs_to_ids(tokenizer, context=self.sep)
|
108 |
+
encoded_pairs = []
|
109 |
+
for turn_idx, (query, resp) in enumerate(history):
|
110 |
+
if turn_idx == 0:
|
111 |
+
prefix_ids = self._convert_inputs_to_ids(tokenizer, context=self.prefix, system=system)
|
112 |
+
if len(prefix_ids) != 0: # has prefix
|
113 |
+
prefix_ids = bos_ids + prefix_ids + sep_ids
|
114 |
+
else:
|
115 |
+
prefix_ids = bos_ids
|
116 |
+
else:
|
117 |
+
prefix_ids = sep_ids + bos_ids
|
118 |
+
|
119 |
+
query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query, idx=str(turn_idx+1))
|
120 |
+
resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp])
|
121 |
+
encoded_pairs.append((prefix_ids + query_ids, resp_ids + eos_ids))
|
122 |
+
return encoded_pairs
|
123 |
+
|
124 |
+
def _convert_inputs_to_ids(
|
125 |
+
self,
|
126 |
+
tokenizer: "PreTrainedTokenizer",
|
127 |
+
context: List[Union[str, Dict[str, str]]],
|
128 |
+
system: Optional[str] = None,
|
129 |
+
query: Optional[str] = None,
|
130 |
+
idx: Optional[str] = None
|
131 |
+
) -> List[int]:
|
132 |
+
r"""
|
133 |
+
Converts context to token ids.
|
134 |
+
"""
|
135 |
+
if isinstance(getattr(tokenizer, "tokenizer", None), tiktoken.Encoding): # for tiktoken tokenizer (Qwen)
|
136 |
+
kwargs = dict(allowed_special="all")
|
137 |
+
else:
|
138 |
+
kwargs = dict(add_special_tokens=False)
|
139 |
+
|
140 |
+
token_ids = []
|
141 |
+
for elem in context:
|
142 |
+
if isinstance(elem, str):
|
143 |
+
elem = elem.replace("{{system}}", system, 1) if system is not None else elem
|
144 |
+
elem = elem.replace("{{query}}", query, 1) if query is not None else elem
|
145 |
+
elem = elem.replace("{{idx}}", idx, 1) if idx is not None else elem
|
146 |
+
if len(elem) != 0:
|
147 |
+
token_ids = token_ids + tokenizer.encode(elem, **kwargs)
|
148 |
+
elif isinstance(elem, dict):
|
149 |
+
token_ids = token_ids + [tokenizer.convert_tokens_to_ids(elem.get("token"))]
|
150 |
+
else:
|
151 |
+
raise ValueError("Input must be string or dict[str, str], got {}".format(type(elem)))
|
152 |
+
|
153 |
+
return token_ids
|
154 |
+
|
155 |
+
|
156 |
+
@dataclass
|
157 |
+
class Llama2Template(Template):
|
158 |
+
|
159 |
+
def _encode(
|
160 |
+
self,
|
161 |
+
tokenizer: "PreTrainedTokenizer",
|
162 |
+
system: str,
|
163 |
+
history: List[Tuple[str, str]]
|
164 |
+
) -> List[Tuple[List[int], List[int]]]:
|
165 |
+
r"""
|
166 |
+
Encodes formatted inputs to pairs of token ids.
|
167 |
+
Turn 0: bos + prefix + query resp + eos
|
168 |
+
Turn t: bos + query resp + eos
|
169 |
+
"""
|
170 |
+
bos_ids, eos_ids = self._get_special_ids(tokenizer)
|
171 |
+
encoded_pairs = []
|
172 |
+
for turn_idx, (query, resp) in enumerate(history):
|
173 |
+
if turn_idx == 0: # llama2 template has no sep_ids
|
174 |
+
query = self.prefix[0].replace("{{system}}", system) + query
|
175 |
+
query_ids = self._convert_inputs_to_ids(tokenizer, context=self.prompt, query=query)
|
176 |
+
resp_ids = self._convert_inputs_to_ids(tokenizer, context=[resp])
|
177 |
+
encoded_pairs.append((bos_ids + query_ids, resp_ids + eos_ids))
|
178 |
+
return encoded_pairs
|
179 |
+
|
180 |
+
|
181 |
+
templates: Dict[str, Template] = {}
|
182 |
+
|
183 |
+
|
184 |
+
def register_template(
|
185 |
+
name: str,
|
186 |
+
prefix: List[Union[str, Dict[str, str]]],
|
187 |
+
prompt: List[Union[str, Dict[str, str]]],
|
188 |
+
system: str,
|
189 |
+
sep: List[Union[str, Dict[str, str]]],
|
190 |
+
stop_words: Optional[List[str]] = [],
|
191 |
+
use_history: Optional[bool] = True,
|
192 |
+
efficient_eos: Optional[bool] = False,
|
193 |
+
replace_eos: Optional[bool] = False
|
194 |
+
) -> None:
|
195 |
+
template_class = Llama2Template if name.startswith("llama2") else Template
|
196 |
+
templates[name] = template_class(
|
197 |
+
prefix=prefix,
|
198 |
+
prompt=prompt,
|
199 |
+
system=system,
|
200 |
+
sep=sep,
|
201 |
+
stop_words=stop_words,
|
202 |
+
use_history=use_history,
|
203 |
+
efficient_eos=efficient_eos,
|
204 |
+
replace_eos=replace_eos
|
205 |
+
)
|
206 |
+
|
207 |
+
|
208 |
+
def get_template_and_fix_tokenizer(
|
209 |
+
name: str,
|
210 |
+
tokenizer: "PreTrainedTokenizer"
|
211 |
+
) -> Template:
|
212 |
+
if tokenizer.eos_token_id is None:
|
213 |
+
tokenizer.eos_token = "<|endoftext|>"
|
214 |
+
logger.info("Add eos token: {}".format(tokenizer.eos_token))
|
215 |
+
|
216 |
+
if tokenizer.pad_token_id is None:
|
217 |
+
tokenizer.pad_token = tokenizer.eos_token
|
218 |
+
logger.info("Add pad token: {}".format(tokenizer.pad_token))
|
219 |
+
|
220 |
+
if name is None: # for pre-training
|
221 |
+
return None
|
222 |
+
|
223 |
+
template = templates.get(name, None)
|
224 |
+
assert template is not None, "Template {} does not exist.".format(name)
|
225 |
+
|
226 |
+
stop_words = template.stop_words
|
227 |
+
if template.replace_eos:
|
228 |
+
if not stop_words:
|
229 |
+
raise ValueError("Stop words are required to replace the EOS token.")
|
230 |
+
|
231 |
+
tokenizer.eos_token = stop_words[0]
|
232 |
+
stop_words = stop_words[1:]
|
233 |
+
logger.info("Replace eos token: {}".format(tokenizer.eos_token))
|
234 |
+
|
235 |
+
if stop_words:
|
236 |
+
tokenizer.add_special_tokens(
|
237 |
+
dict(additional_special_tokens=stop_words),
|
238 |
+
replace_additional_special_tokens=False
|
239 |
+
)
|
240 |
+
logger.info("Add {} to stop words.".format(",".join(stop_words)))
|
241 |
+
|
242 |
+
return template
|
243 |
+
|
244 |
+
|
245 |
+
register_template(
|
246 |
+
name="alpaca",
|
247 |
+
prefix=[
|
248 |
+
"{{system}}"
|
249 |
+
],
|
250 |
+
prompt=[
|
251 |
+
"### Instruction:\n{{query}}\n\n### Response:\n"
|
252 |
+
],
|
253 |
+
system=(
|
254 |
+
"Below is an instruction that describes a task. "
|
255 |
+
"Write a response that appropriately completes the request."
|
256 |
+
),
|
257 |
+
sep=[
|
258 |
+
"\n\n"
|
259 |
+
]
|
260 |
+
)
|
261 |
+
|
262 |
+
|
263 |
+
register_template(
|
264 |
+
name="aquila",
|
265 |
+
prefix=[
|
266 |
+
"{{system}}"
|
267 |
+
],
|
268 |
+
prompt=[
|
269 |
+
"Human: {{query}}###Assistant:"
|
270 |
+
],
|
271 |
+
system=(
|
272 |
+
"A chat between a curious human and an artificial intelligence assistant. "
|
273 |
+
"The assistant gives helpful, detailed, and polite answers to the human's questions."
|
274 |
+
),
|
275 |
+
sep=[
|
276 |
+
"###"
|
277 |
+
],
|
278 |
+
stop_words=[
|
279 |
+
"</s>"
|
280 |
+
],
|
281 |
+
efficient_eos=True
|
282 |
+
)
|
283 |
+
|
284 |
+
|
285 |
+
register_template(
|
286 |
+
name="baichuan",
|
287 |
+
prefix=[
|
288 |
+
"{{system}}"
|
289 |
+
],
|
290 |
+
prompt=[
|
291 |
+
{"token": "<reserved_102>"}, # user token
|
292 |
+
"{{query}}",
|
293 |
+
{"token": "<reserved_103>"} # assistant token
|
294 |
+
],
|
295 |
+
system="",
|
296 |
+
sep=[],
|
297 |
+
efficient_eos=True
|
298 |
+
)
|
299 |
+
|
300 |
+
|
301 |
+
register_template(
|
302 |
+
name="baichuan2",
|
303 |
+
prefix=[
|
304 |
+
"{{system}}"
|
305 |
+
],
|
306 |
+
prompt=[
|
307 |
+
{"token": "<reserved_106>"}, # user token
|
308 |
+
"{{query}}",
|
309 |
+
{"token": "<reserved_107>"} # assistant token
|
310 |
+
],
|
311 |
+
system="",
|
312 |
+
sep=[],
|
313 |
+
efficient_eos=True
|
314 |
+
)
|
315 |
+
|
316 |
+
|
317 |
+
register_template(
|
318 |
+
name="belle",
|
319 |
+
prefix=[
|
320 |
+
"{{system}}"
|
321 |
+
],
|
322 |
+
prompt=[
|
323 |
+
"Human: {{query}}\n\nBelle: "
|
324 |
+
],
|
325 |
+
system="",
|
326 |
+
sep=[
|
327 |
+
"\n\n"
|
328 |
+
]
|
329 |
+
)
|
330 |
+
|
331 |
+
|
332 |
+
register_template(
|
333 |
+
name="bluelm",
|
334 |
+
prefix=[
|
335 |
+
"{{system}}"
|
336 |
+
],
|
337 |
+
prompt=[
|
338 |
+
{"token": "[|Human|]:"},
|
339 |
+
"{{query}}",
|
340 |
+
{"token": "[|AI|]:"}
|
341 |
+
],
|
342 |
+
system="",
|
343 |
+
sep=[]
|
344 |
+
)
|
345 |
+
|
346 |
+
|
347 |
+
register_template(
|
348 |
+
name="chatglm2",
|
349 |
+
prefix=[
|
350 |
+
{"token": "[gMASK]"},
|
351 |
+
{"token": "sop"},
|
352 |
+
"{{system}}"
|
353 |
+
],
|
354 |
+
prompt=[
|
355 |
+
"[Round {{idx}}]\n\n问:{{query}}\n\n答:"
|
356 |
+
],
|
357 |
+
system="",
|
358 |
+
sep=[
|
359 |
+
"\n\n"
|
360 |
+
],
|
361 |
+
efficient_eos=True
|
362 |
+
)
|
363 |
+
|
364 |
+
|
365 |
+
register_template(
|
366 |
+
name="chatglm3",
|
367 |
+
prefix=[
|
368 |
+
{"token": "[gMASK]"},
|
369 |
+
{"token": "sop"},
|
370 |
+
{"token": "<|system|>"},
|
371 |
+
"\n",
|
372 |
+
"{{system}}"
|
373 |
+
],
|
374 |
+
prompt=[
|
375 |
+
{"token": "<|user|>"},
|
376 |
+
"\n",
|
377 |
+
"{{query}}",
|
378 |
+
{"token": "<|assistant|>"},
|
379 |
+
"\n" # add an extra newline to avoid error in ChatGLM's process_response method
|
380 |
+
],
|
381 |
+
system=(
|
382 |
+
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
|
383 |
+
"Follow the user's instructions carefully. Respond using markdown."
|
384 |
+
),
|
385 |
+
sep=[],
|
386 |
+
stop_words=[
|
387 |
+
"<|user|>",
|
388 |
+
"<|observation|>"
|
389 |
+
],
|
390 |
+
efficient_eos=True
|
391 |
+
)
|
392 |
+
|
393 |
+
|
394 |
+
register_template(
|
395 |
+
name="chatglm3_raw", # the raw template for tool tuning
|
396 |
+
prefix=[
|
397 |
+
{"token": "[gMASK]"},
|
398 |
+
{"token": "sop"},
|
399 |
+
{"token": "<|system|>"},
|
400 |
+
"\n",
|
401 |
+
"{{system}}"
|
402 |
+
],
|
403 |
+
prompt=[
|
404 |
+
{"token": "<|user|>"},
|
405 |
+
"\n",
|
406 |
+
"{{query}}",
|
407 |
+
{"token": "<|assistant|>"}
|
408 |
+
],
|
409 |
+
system=(
|
410 |
+
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
|
411 |
+
"Follow the user's instructions carefully. Respond using markdown."
|
412 |
+
),
|
413 |
+
sep=[],
|
414 |
+
stop_words=[
|
415 |
+
"<|user|>",
|
416 |
+
"<|observation|>"
|
417 |
+
],
|
418 |
+
efficient_eos=True
|
419 |
+
)
|
420 |
+
|
421 |
+
|
422 |
+
register_template(
|
423 |
+
name="codegeex2",
|
424 |
+
prefix=[
|
425 |
+
{"token": "[gMASK]"},
|
426 |
+
{"token": "sop"},
|
427 |
+
"{{system}}"
|
428 |
+
],
|
429 |
+
prompt=[
|
430 |
+
"{{query}}"
|
431 |
+
],
|
432 |
+
system="",
|
433 |
+
sep=[]
|
434 |
+
)
|
435 |
+
|
436 |
+
|
437 |
+
register_template(
|
438 |
+
name="deepseek",
|
439 |
+
prefix=[
|
440 |
+
"{{system}}"
|
441 |
+
],
|
442 |
+
prompt=[
|
443 |
+
"User: {{query}}\n\nAssistant:"
|
444 |
+
],
|
445 |
+
system="",
|
446 |
+
sep=[]
|
447 |
+
)
|
448 |
+
|
449 |
+
|
450 |
+
register_template(
|
451 |
+
name="deepseekcoder",
|
452 |
+
prefix=[
|
453 |
+
"{{system}}"
|
454 |
+
],
|
455 |
+
prompt=[
|
456 |
+
"### Instruction:\n{{query}}\n### Response:\n"
|
457 |
+
],
|
458 |
+
system=(
|
459 |
+
"You are an AI programming assistant, utilizing the Deepseek Coder model, "
|
460 |
+
"developed by Deepseek Company, and you only answer questions related to computer science. "
|
461 |
+
"For politically sensitive questions, security and privacy issues, "
|
462 |
+
"and other non-computer science questions, you will refuse to answer\n"
|
463 |
+
),
|
464 |
+
sep=[
|
465 |
+
"\n",
|
466 |
+
{"token": "<|EOT|>"},
|
467 |
+
"\n"
|
468 |
+
],
|
469 |
+
stop_words=[
|
470 |
+
"<|EOT|>"
|
471 |
+
],
|
472 |
+
efficient_eos=True
|
473 |
+
)
|
474 |
+
|
475 |
+
|
476 |
+
register_template(
|
477 |
+
name="default",
|
478 |
+
prefix=[
|
479 |
+
"{{system}}"
|
480 |
+
],
|
481 |
+
prompt=[
|
482 |
+
"Human: {{query}}\nAssistant:"
|
483 |
+
],
|
484 |
+
system=(
|
485 |
+
"A chat between a curious user and an artificial intelligence assistant. "
|
486 |
+
"The assistant gives helpful, detailed, and polite answers to the user's questions."
|
487 |
+
),
|
488 |
+
sep=[
|
489 |
+
"\n"
|
490 |
+
]
|
491 |
+
)
|
492 |
+
|
493 |
+
|
494 |
+
register_template(
|
495 |
+
name="falcon",
|
496 |
+
prefix=[
|
497 |
+
"{{system}}"
|
498 |
+
],
|
499 |
+
prompt=[
|
500 |
+
"User: {{query}}\nFalcon:"
|
501 |
+
],
|
502 |
+
system="",
|
503 |
+
sep=[
|
504 |
+
"\n"
|
505 |
+
],
|
506 |
+
efficient_eos=True
|
507 |
+
)
|
508 |
+
|
509 |
+
|
510 |
+
register_template(
|
511 |
+
name="intern",
|
512 |
+
prefix=[
|
513 |
+
"{{system}}"
|
514 |
+
],
|
515 |
+
prompt=[
|
516 |
+
"<|User|>:{{query}}",
|
517 |
+
{"token": "<eoh>"},
|
518 |
+
"\n<|Bot|>:"
|
519 |
+
],
|
520 |
+
system="",
|
521 |
+
sep=[
|
522 |
+
{"token": "<eoa>"},
|
523 |
+
"\n"
|
524 |
+
],
|
525 |
+
stop_words=[
|
526 |
+
"<eoa>"
|
527 |
+
],
|
528 |
+
efficient_eos=True
|
529 |
+
)
|
530 |
+
|
531 |
+
|
532 |
+
register_template(
|
533 |
+
name="llama2",
|
534 |
+
prefix=[
|
535 |
+
"<<SYS>>\n{{system}}\n<</SYS>>\n\n"
|
536 |
+
],
|
537 |
+
prompt=[
|
538 |
+
"[INST] {{query}} [/INST]"
|
539 |
+
],
|
540 |
+
system=(
|
541 |
+
"You are a helpful, respectful and honest assistant. "
|
542 |
+
"Always answer as helpfully as possible, while being safe. "
|
543 |
+
"Your answers should not include any harmful, unethical, "
|
544 |
+
"racist, sexist, toxic, dangerous, or illegal content. "
|
545 |
+
"Please ensure that your responses are socially unbiased and positive in nature.\n\n"
|
546 |
+
"If a question does not make any sense, or is not factually coherent, "
|
547 |
+
"explain why instead of answering something not correct. "
|
548 |
+
"If you don't know the answer to a question, please don't share false information."
|
549 |
+
),
|
550 |
+
sep=[]
|
551 |
+
)
|
552 |
+
|
553 |
+
|
554 |
+
register_template(
|
555 |
+
name="llama2_zh",
|
556 |
+
prefix=[
|
557 |
+
"<<SYS>>\n{{system}}\n<</SYS>>\n\n"
|
558 |
+
],
|
559 |
+
prompt=[
|
560 |
+
"[INST] {{query}} [/INST]"
|
561 |
+
],
|
562 |
+
system="You are a helpful assistant. 你是一个乐于助人的助手。",
|
563 |
+
sep=[]
|
564 |
+
)
|
565 |
+
|
566 |
+
|
567 |
+
register_template(
|
568 |
+
name="mistral",
|
569 |
+
prefix=[
|
570 |
+
"{{system}}"
|
571 |
+
],
|
572 |
+
prompt=[
|
573 |
+
"[INST] {{query}} [/INST]"
|
574 |
+
],
|
575 |
+
system="",
|
576 |
+
sep=[]
|
577 |
+
)
|
578 |
+
|
579 |
+
|
580 |
+
register_template(
|
581 |
+
name="openchat",
|
582 |
+
prefix=[
|
583 |
+
"{{system}}"
|
584 |
+
],
|
585 |
+
prompt=[
|
586 |
+
"GPT4 Correct User: {{query}}",
|
587 |
+
{"token": "<|end_of_turn|>"},
|
588 |
+
"GPT4 Correct Assistant:"
|
589 |
+
],
|
590 |
+
system="",
|
591 |
+
sep=[
|
592 |
+
{"token": "<|end_of_turn|>"}
|
593 |
+
],
|
594 |
+
stop_words=[
|
595 |
+
"<|end_of_turn|>"
|
596 |
+
],
|
597 |
+
efficient_eos=True
|
598 |
+
)
|
599 |
+
|
600 |
+
|
601 |
+
register_template(
|
602 |
+
name="qwen",
|
603 |
+
prefix=[
|
604 |
+
"<|im_start|>system\n{{system}}<|im_end|>"
|
605 |
+
],
|
606 |
+
prompt=[
|
607 |
+
"<|im_start|>user\n{{query}}<|im_end|>\n<|im_start|>assistant\n"
|
608 |
+
],
|
609 |
+
system="You are a helpful assistant.",
|
610 |
+
sep=[
|
611 |
+
"\n"
|
612 |
+
],
|
613 |
+
stop_words=[
|
614 |
+
"<|im_end|>"
|
615 |
+
],
|
616 |
+
replace_eos=True
|
617 |
+
)
|
618 |
+
|
619 |
+
|
620 |
+
register_template(
|
621 |
+
name="solar",
|
622 |
+
prefix=[
|
623 |
+
"{{system}}"
|
624 |
+
],
|
625 |
+
prompt=[
|
626 |
+
"### User:\n{{query}}\n\n### Assistant:\n"
|
627 |
+
],
|
628 |
+
system="",
|
629 |
+
sep=[]
|
630 |
+
)
|
631 |
+
|
632 |
+
|
633 |
+
register_template(
|
634 |
+
name="starchat",
|
635 |
+
prefix=[
|
636 |
+
{"token": "<|system|>"},
|
637 |
+
"\n{{system}}",
|
638 |
+
],
|
639 |
+
prompt=[
|
640 |
+
{"token": "<|user|>"},
|
641 |
+
"\n{{query}}",
|
642 |
+
{"token": "<|end|>"},
|
643 |
+
"\n",
|
644 |
+
{"token": "<|assistant|>"}
|
645 |
+
],
|
646 |
+
system="",
|
647 |
+
sep=[
|
648 |
+
{"token": "<|end|>"},
|
649 |
+
"\n"
|
650 |
+
],
|
651 |
+
stop_words=[
|
652 |
+
"<|end|>"
|
653 |
+
],
|
654 |
+
efficient_eos=True
|
655 |
+
)
|
656 |
+
|
657 |
+
|
658 |
+
register_template(
|
659 |
+
name="vanilla",
|
660 |
+
prefix=[],
|
661 |
+
prompt=[
|
662 |
+
"{{query}}"
|
663 |
+
],
|
664 |
+
system="",
|
665 |
+
sep=[],
|
666 |
+
use_history=False
|
667 |
+
)
|
668 |
+
|
669 |
+
|
670 |
+
register_template(
|
671 |
+
name="vicuna",
|
672 |
+
prefix=[
|
673 |
+
"{{system}}"
|
674 |
+
],
|
675 |
+
prompt=[
|
676 |
+
"USER: {{query}} ASSISTANT:"
|
677 |
+
],
|
678 |
+
system=(
|
679 |
+
"A chat between a curious user and an artificial intelligence assistant. "
|
680 |
+
"The assistant gives helpful, detailed, and polite answers to the user's questions."
|
681 |
+
),
|
682 |
+
sep=[]
|
683 |
+
)
|
684 |
+
|
685 |
+
|
686 |
+
register_template(
|
687 |
+
name="xuanyuan",
|
688 |
+
prefix=[
|
689 |
+
"{{system}}"
|
690 |
+
],
|
691 |
+
prompt=[
|
692 |
+
"Human: {{query}} Assistant:"
|
693 |
+
],
|
694 |
+
system=(
|
695 |
+
"以下是用户和人工智能助手之间的对话。用户以Human开头,人工智能助手以Assistant开头,"
|
696 |
+
"会对人类提出的问题给出有帮助、高质量、详细和礼貌的回答,并且总是拒绝参与与不道德、"
|
697 |
+
"不安全、有争议、政治敏感等相关的话题、问题和指示。\n"
|
698 |
+
),
|
699 |
+
sep=[]
|
700 |
+
)
|
701 |
+
|
702 |
+
|
703 |
+
register_template(
|
704 |
+
name="xverse",
|
705 |
+
prefix=[
|
706 |
+
"{{system}}"
|
707 |
+
],
|
708 |
+
prompt=[
|
709 |
+
"Human: {{query}}\n\nAssistant: "
|
710 |
+
],
|
711 |
+
system="",
|
712 |
+
sep=[]
|
713 |
+
)
|
714 |
+
|
715 |
+
|
716 |
+
register_template(
|
717 |
+
name="yayi",
|
718 |
+
prefix=[
|
719 |
+
{"token": "<|System|>"},
|
720 |
+
":\n{{system}}"
|
721 |
+
],
|
722 |
+
prompt=[
|
723 |
+
{"token": "<|Human|>"},
|
724 |
+
":\n{{query}}\n\n",
|
725 |
+
{"token": "<|YaYi|>"},
|
726 |
+
":"
|
727 |
+
],
|
728 |
+
system=(
|
729 |
+
"You are a helpful, respectful and honest assistant named YaYi "
|
730 |
+
"developed by Beijing Wenge Technology Co.,Ltd. "
|
731 |
+
"Always answer as helpfully as possible, while being safe. "
|
732 |
+
"Your answers should not include any harmful, unethical, "
|
733 |
+
"racist, sexist, toxic, dangerous, or illegal content. "
|
734 |
+
"Please ensure that your responses are socially unbiased and positive in nature.\n\n"
|
735 |
+
"If a question does not make any sense, or is not factually coherent, "
|
736 |
+
"explain why instead of answering something not correct. "
|
737 |
+
"If you don't know the answer to a question, please don't share false information."
|
738 |
+
),
|
739 |
+
sep=[
|
740 |
+
"\n\n"
|
741 |
+
],
|
742 |
+
stop_words=[
|
743 |
+
"<|End|>"
|
744 |
+
]
|
745 |
+
)
|
746 |
+
|
747 |
+
|
748 |
+
register_template(
|
749 |
+
name="yi",
|
750 |
+
prefix=[
|
751 |
+
"{{system}}"
|
752 |
+
],
|
753 |
+
prompt=[
|
754 |
+
"<|im_start|>user\n{{query}}<|im_end|>\n<|im_start|>assistant\n"
|
755 |
+
],
|
756 |
+
system="",
|
757 |
+
sep=[
|
758 |
+
"\n"
|
759 |
+
],
|
760 |
+
stop_words=[
|
761 |
+
"<|im_end|>"
|
762 |
+
],
|
763 |
+
replace_eos=True
|
764 |
+
)
|
765 |
+
|
766 |
+
|
767 |
+
register_template(
|
768 |
+
name="yuan",
|
769 |
+
prefix=[
|
770 |
+
"{{system}}"
|
771 |
+
],
|
772 |
+
prompt=[
|
773 |
+
"{{query}}",
|
774 |
+
{"token": "<sep>"}
|
775 |
+
],
|
776 |
+
system="",
|
777 |
+
sep=[
|
778 |
+
"\n"
|
779 |
+
],
|
780 |
+
stop_words=[
|
781 |
+
"<eod>"
|
782 |
+
],
|
783 |
+
replace_eos=True
|
784 |
+
)
|
785 |
+
|
786 |
+
|
787 |
+
register_template(
|
788 |
+
name="zephyr",
|
789 |
+
prefix=[
|
790 |
+
"<|system|>\n{{system}}</s>",
|
791 |
+
],
|
792 |
+
prompt=[
|
793 |
+
"<|user|>\n{{query}}</s><|assistant|>"
|
794 |
+
],
|
795 |
+
system="You are a friendly chatbot who always responds in the style of a pirate",
|
796 |
+
sep=[]
|
797 |
+
)
|
798 |
+
|
799 |
+
|
800 |
+
register_template(
|
801 |
+
name="ziya",
|
802 |
+
prefix=[
|
803 |
+
"{{system}}"
|
804 |
+
],
|
805 |
+
prompt=[
|
806 |
+
{"token": "<human>"},
|
807 |
+
":{{query}}\n",
|
808 |
+
{"token": "<bot>"},
|
809 |
+
":"
|
810 |
+
],
|
811 |
+
system="",
|
812 |
+
sep=[
|
813 |
+
"\n"
|
814 |
+
]
|
815 |
+
)
|
LLM-Detector-V7-11w/src/llmtuner/data/utils.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
3 |
+
|
4 |
+
from llmtuner.extras.logging import get_logger
|
5 |
+
|
6 |
+
if TYPE_CHECKING:
|
7 |
+
from datasets import Dataset, IterableDataset
|
8 |
+
from transformers import TrainingArguments
|
9 |
+
from llmtuner.hparams import DataArguments
|
10 |
+
|
11 |
+
|
12 |
+
logger = get_logger(__name__)
|
13 |
+
|
14 |
+
|
15 |
+
def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None:
|
16 |
+
if file_sha1 is None:
|
17 |
+
logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
|
18 |
+
return
|
19 |
+
|
20 |
+
if len(data_files) != 1:
|
21 |
+
logger.warning("Checksum failed: too many files.")
|
22 |
+
return
|
23 |
+
|
24 |
+
with open(data_files[0], "rb") as f:
|
25 |
+
sha1 = hashlib.sha1(f.read()).hexdigest()
|
26 |
+
if sha1 != file_sha1:
|
27 |
+
logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0]))
|
28 |
+
|
29 |
+
|
30 |
+
def split_dataset(
|
31 |
+
dataset: Union["Dataset", "IterableDataset"],
|
32 |
+
data_args: "DataArguments",
|
33 |
+
training_args: "TrainingArguments"
|
34 |
+
) -> Dict[str, "Dataset"]:
|
35 |
+
if training_args.do_train:
|
36 |
+
if data_args.val_size > 1e-6: # Split the dataset
|
37 |
+
if data_args.streaming:
|
38 |
+
val_set = dataset.take(int(data_args.val_size))
|
39 |
+
train_set = dataset.skip(int(data_args.val_size))
|
40 |
+
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
|
41 |
+
return {"train_dataset": train_set, "eval_dataset": val_set}
|
42 |
+
else:
|
43 |
+
val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
|
44 |
+
dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed)
|
45 |
+
return {"train_dataset": dataset["train"], "eval_dataset": dataset["test"]}
|
46 |
+
else:
|
47 |
+
if data_args.streaming:
|
48 |
+
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
|
49 |
+
return {"train_dataset": dataset}
|
50 |
+
else: # do_eval or do_predict
|
51 |
+
return {"eval_dataset": dataset}
|
LLM-Detector-V7-11w/src/llmtuner/eval/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.eval.evaluator import Evaluator
|
LLM-Detector-V7-11w/src/llmtuner/eval/evaluator.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
import torch
|
6 |
+
import tiktoken
|
7 |
+
import numpy as np
|
8 |
+
from tqdm import tqdm, trange
|
9 |
+
from typing import Any, Dict, List, Optional
|
10 |
+
|
11 |
+
from datasets import load_dataset
|
12 |
+
from transformers.utils import cached_file
|
13 |
+
|
14 |
+
from llmtuner.data.template import get_template_and_fix_tokenizer
|
15 |
+
from llmtuner.eval.template import get_eval_template
|
16 |
+
from llmtuner.extras.constants import CHOICES, SUBJECTS
|
17 |
+
from llmtuner.model import dispatch_model, get_eval_args, load_model_and_tokenizer
|
18 |
+
|
19 |
+
|
20 |
+
class Evaluator:
|
21 |
+
|
22 |
+
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
|
23 |
+
self.model_args, self.data_args, self.eval_args, finetuning_args = get_eval_args(args)
|
24 |
+
self.model, self.tokenizer = load_model_and_tokenizer(self.model_args, finetuning_args)
|
25 |
+
self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2
|
26 |
+
self.model = dispatch_model(self.model)
|
27 |
+
self.template = get_template_and_fix_tokenizer(self.data_args.template, self.tokenizer)
|
28 |
+
self.eval_template = get_eval_template(self.eval_args.lang)
|
29 |
+
self.choice_inputs = self._encode_choices()
|
30 |
+
|
31 |
+
def _encode_choices(self) -> List[int]:
|
32 |
+
if isinstance(getattr(self.tokenizer, "tokenizer", None), tiktoken.Encoding): # for tiktoken tokenizer (Qwen)
|
33 |
+
kwargs = dict(allowed_special="all")
|
34 |
+
else:
|
35 |
+
kwargs = dict(add_special_tokens=False)
|
36 |
+
|
37 |
+
return [self.tokenizer.encode(self.eval_template.prefix + ch, **kwargs)[-1] for ch in CHOICES]
|
38 |
+
|
39 |
+
@torch.inference_mode()
|
40 |
+
def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
|
41 |
+
logits = self.model(**batch_input).logits
|
42 |
+
lengths = torch.sum(batch_input["attention_mask"], dim=-1)
|
43 |
+
word_probs = torch.stack([logits[i, lengths[i] - 1] for i in range(len(lengths))], dim=0)
|
44 |
+
choice_probs = torch.nn.functional.softmax(word_probs[:, self.choice_inputs], dim=-1).detach()
|
45 |
+
return [chr(ord("A") + offset.item()) for offset in torch.argmax(choice_probs, dim=-1)]
|
46 |
+
|
47 |
+
def eval(self) -> None:
|
48 |
+
mapping = cached_file(
|
49 |
+
path_or_repo_id = os.path.join(self.eval_args.task_dir, self.eval_args.task),
|
50 |
+
filename="mapping.json",
|
51 |
+
cache_dir=self.model_args.cache_dir,
|
52 |
+
token=self.model_args.hf_hub_token
|
53 |
+
)
|
54 |
+
|
55 |
+
with open(mapping, "r", encoding="utf-8") as f:
|
56 |
+
categorys: Dict[str, Dict[str, str]] = json.load(f)
|
57 |
+
|
58 |
+
category_corrects = {subj: np.array([], dtype="bool") for subj in SUBJECTS}
|
59 |
+
pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0)
|
60 |
+
results = {}
|
61 |
+
for subject in pbar:
|
62 |
+
dataset = load_dataset(
|
63 |
+
path=os.path.join(self.eval_args.task_dir, self.eval_args.task),
|
64 |
+
name=subject,
|
65 |
+
cache_dir=self.model_args.cache_dir,
|
66 |
+
download_mode=self.eval_args.download_mode,
|
67 |
+
token=self.model_args.hf_hub_token
|
68 |
+
)
|
69 |
+
pbar.set_postfix_str(categorys[subject]["name"])
|
70 |
+
inputs, outputs, labels = [], [], []
|
71 |
+
for i in trange(len(dataset[self.data_args.split]), desc="Formatting batches", position=1, leave=False):
|
72 |
+
support_set = dataset["train"].shuffle().select(range(min(self.eval_args.n_shot, len(dataset["train"]))))
|
73 |
+
query, resp, history = self.eval_template.format_example(
|
74 |
+
target_data=dataset[self.data_args.split][i],
|
75 |
+
support_set=support_set,
|
76 |
+
subject_name=categorys[subject]["name"],
|
77 |
+
use_history=self.template.use_history
|
78 |
+
)
|
79 |
+
input_ids, _ = self.template.encode_oneturn(
|
80 |
+
tokenizer=self.tokenizer, query=query, resp=resp, history=history
|
81 |
+
)
|
82 |
+
inputs.append({"input_ids": input_ids, "attention_mask": [1] * len(input_ids)})
|
83 |
+
labels.append(resp)
|
84 |
+
|
85 |
+
for i in trange(0, len(inputs), self.eval_args.batch_size, desc="Predicting batches", position=1, leave=False):
|
86 |
+
batch_input = self.tokenizer.pad(
|
87 |
+
inputs[i : i + self.eval_args.batch_size], return_attention_mask=True, return_tensors="pt"
|
88 |
+
).to(self.model.device)
|
89 |
+
preds = self.batch_inference(batch_input)
|
90 |
+
outputs += preds
|
91 |
+
|
92 |
+
corrects = (np.array(outputs) == np.array(labels))
|
93 |
+
category_name = categorys[subject]["category"]
|
94 |
+
category_corrects[category_name] = np.concatenate([category_corrects[category_name], corrects], axis=0)
|
95 |
+
category_corrects["Average"] = np.concatenate([category_corrects["Average"], corrects], axis=0)
|
96 |
+
results[subject] = {str(i): outputs[i] for i in range(len(outputs))}
|
97 |
+
|
98 |
+
pbar.close()
|
99 |
+
self._save_results(category_corrects, results)
|
100 |
+
|
101 |
+
def _save_results(self, category_corrects: Dict[str, np.ndarray], results: Dict[str, Dict[int, str]]) -> None:
|
102 |
+
score_info = "\n".join([
|
103 |
+
"{:>15}: {:.2f}".format(category_name, 100 * np.mean(category_correct))
|
104 |
+
for category_name, category_correct in category_corrects.items() if len(category_correct)
|
105 |
+
])
|
106 |
+
print(score_info)
|
107 |
+
if self.eval_args.save_dir is not None:
|
108 |
+
os.makedirs(self.eval_args.save_dir, exist_ok=False)
|
109 |
+
with open(os.path.join(self.eval_args.save_dir, "results.json"), "w", encoding="utf-8", newline="\n") as f:
|
110 |
+
json.dump(results, f, indent=2)
|
111 |
+
|
112 |
+
with open(os.path.join(self.eval_args.save_dir, "results.log"), "w", encoding="utf-8", newline="\n") as f:
|
113 |
+
f.write(score_info)
|
114 |
+
|
115 |
+
|
116 |
+
if __name__ == "__main__":
|
117 |
+
evaluator = Evaluator()
|
118 |
+
evaluator.eval()
|
LLM-Detector-V7-11w/src/llmtuner/eval/template.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import TYPE_CHECKING, Dict, List, Tuple
|
3 |
+
|
4 |
+
from llmtuner.extras.constants import CHOICES
|
5 |
+
|
6 |
+
if TYPE_CHECKING:
|
7 |
+
from datasets import Dataset
|
8 |
+
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class EvalTemplate:
|
12 |
+
|
13 |
+
system: str
|
14 |
+
choice: str
|
15 |
+
answer: str
|
16 |
+
prefix: str
|
17 |
+
|
18 |
+
def parse_example(
|
19 |
+
self,
|
20 |
+
example: Dict[str, str]
|
21 |
+
) -> Tuple[str, str]:
|
22 |
+
candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
|
23 |
+
return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
|
24 |
+
|
25 |
+
def format_example(
|
26 |
+
self,
|
27 |
+
target_data: Dict[str, str],
|
28 |
+
support_set: "Dataset",
|
29 |
+
subject_name: str,
|
30 |
+
use_history: bool
|
31 |
+
) -> Tuple[str, str, List[Tuple[str, str]]]:
|
32 |
+
query, resp = self.parse_example(target_data)
|
33 |
+
history = [self.parse_example(support_set[k]) for k in range(len(support_set))]
|
34 |
+
|
35 |
+
if len(history):
|
36 |
+
temp = history.pop(0)
|
37 |
+
history.insert(0, (self.system.format(subject=subject_name) + temp[0], temp[1]))
|
38 |
+
else:
|
39 |
+
query = self.system.format(subject=subject_name) + query
|
40 |
+
|
41 |
+
if not use_history:
|
42 |
+
query = "\n\n".join(["".join(item) for item in history] + [query])
|
43 |
+
history = []
|
44 |
+
return query.strip(), resp, history
|
45 |
+
|
46 |
+
|
47 |
+
eval_templates: Dict[str, EvalTemplate] = {}
|
48 |
+
|
49 |
+
|
50 |
+
def register_eval_template(
|
51 |
+
name: str,
|
52 |
+
system: str,
|
53 |
+
choice: str,
|
54 |
+
answer: str,
|
55 |
+
prefix: str
|
56 |
+
) -> None:
|
57 |
+
eval_templates[name] = EvalTemplate(
|
58 |
+
system=system,
|
59 |
+
choice=choice,
|
60 |
+
answer=answer,
|
61 |
+
prefix=prefix
|
62 |
+
)
|
63 |
+
|
64 |
+
|
65 |
+
def get_eval_template(name: str) -> EvalTemplate:
|
66 |
+
eval_template = eval_templates.get(name, None)
|
67 |
+
assert eval_template is not None, "Template {} does not exist.".format(name)
|
68 |
+
return eval_template
|
69 |
+
|
70 |
+
|
71 |
+
register_eval_template(
|
72 |
+
name="en",
|
73 |
+
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
|
74 |
+
choice="\n{choice}. {content}",
|
75 |
+
answer="\nAnswer: ",
|
76 |
+
prefix=" "
|
77 |
+
)
|
78 |
+
|
79 |
+
|
80 |
+
register_eval_template(
|
81 |
+
name="zh",
|
82 |
+
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
|
83 |
+
choice="\n{choice}. {content}",
|
84 |
+
answer="\n答案:",
|
85 |
+
prefix="\n"
|
86 |
+
)
|
LLM-Detector-V7-11w/src/llmtuner/extras/__init__.py
ADDED
File without changes
|
LLM-Detector-V7-11w/src/llmtuner/extras/callbacks.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import time
|
4 |
+
from typing import TYPE_CHECKING
|
5 |
+
from datetime import timedelta
|
6 |
+
from transformers import TrainerCallback
|
7 |
+
from transformers.trainer_utils import has_length, PREFIX_CHECKPOINT_DIR
|
8 |
+
|
9 |
+
from llmtuner.extras.constants import LOG_FILE_NAME
|
10 |
+
from llmtuner.extras.logging import get_logger
|
11 |
+
from llmtuner.extras.misc import fix_valuehead_checkpoint
|
12 |
+
|
13 |
+
|
14 |
+
if TYPE_CHECKING:
|
15 |
+
from transformers import TrainingArguments, TrainerState, TrainerControl
|
16 |
+
|
17 |
+
|
18 |
+
logger = get_logger(__name__)
|
19 |
+
|
20 |
+
|
21 |
+
class FixValueHeadModelCallback(TrainerCallback):
|
22 |
+
|
23 |
+
def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
24 |
+
r"""
|
25 |
+
Event called after a checkpoint save.
|
26 |
+
"""
|
27 |
+
if args.should_save:
|
28 |
+
fix_valuehead_checkpoint(
|
29 |
+
model=kwargs.pop("model"),
|
30 |
+
output_dir=os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step)),
|
31 |
+
safe_serialization=args.save_safetensors
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
class LogCallback(TrainerCallback):
|
36 |
+
|
37 |
+
def __init__(self, runner=None):
|
38 |
+
self.runner = runner
|
39 |
+
self.in_training = False
|
40 |
+
self.start_time = time.time()
|
41 |
+
self.cur_steps = 0
|
42 |
+
self.max_steps = 0
|
43 |
+
self.elapsed_time = ""
|
44 |
+
self.remaining_time = ""
|
45 |
+
|
46 |
+
def timing(self):
|
47 |
+
cur_time = time.time()
|
48 |
+
elapsed_time = cur_time - self.start_time
|
49 |
+
avg_time_per_step = elapsed_time / self.cur_steps if self.cur_steps != 0 else 0
|
50 |
+
remaining_time = (self.max_steps - self.cur_steps) * avg_time_per_step
|
51 |
+
self.elapsed_time = str(timedelta(seconds=int(elapsed_time)))
|
52 |
+
self.remaining_time = str(timedelta(seconds=int(remaining_time)))
|
53 |
+
|
54 |
+
def on_train_begin(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
55 |
+
r"""
|
56 |
+
Event called at the beginning of training.
|
57 |
+
"""
|
58 |
+
if state.is_local_process_zero:
|
59 |
+
self.in_training = True
|
60 |
+
self.start_time = time.time()
|
61 |
+
self.max_steps = state.max_steps
|
62 |
+
if os.path.exists(os.path.join(args.output_dir, LOG_FILE_NAME)) and args.overwrite_output_dir:
|
63 |
+
logger.warning("Previous log file in this folder will be deleted.")
|
64 |
+
os.remove(os.path.join(args.output_dir, LOG_FILE_NAME))
|
65 |
+
|
66 |
+
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
67 |
+
r"""
|
68 |
+
Event called at the end of training.
|
69 |
+
"""
|
70 |
+
if state.is_local_process_zero:
|
71 |
+
self.in_training = False
|
72 |
+
self.cur_steps = 0
|
73 |
+
self.max_steps = 0
|
74 |
+
|
75 |
+
def on_substep_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
76 |
+
r"""
|
77 |
+
Event called at the end of an substep during gradient accumulation.
|
78 |
+
"""
|
79 |
+
if state.is_local_process_zero and self.runner is not None and self.runner.aborted:
|
80 |
+
control.should_epoch_stop = True
|
81 |
+
control.should_training_stop = True
|
82 |
+
|
83 |
+
def on_step_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
84 |
+
r"""
|
85 |
+
Event called at the end of a training step.
|
86 |
+
"""
|
87 |
+
if state.is_local_process_zero:
|
88 |
+
self.cur_steps = state.global_step
|
89 |
+
self.timing()
|
90 |
+
if self.runner is not None and self.runner.aborted:
|
91 |
+
control.should_epoch_stop = True
|
92 |
+
control.should_training_stop = True
|
93 |
+
|
94 |
+
def on_evaluate(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
95 |
+
r"""
|
96 |
+
Event called after an evaluation phase.
|
97 |
+
"""
|
98 |
+
if state.is_local_process_zero and not self.in_training:
|
99 |
+
self.cur_steps = 0
|
100 |
+
self.max_steps = 0
|
101 |
+
|
102 |
+
def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", *other, **kwargs):
|
103 |
+
r"""
|
104 |
+
Event called after a successful prediction.
|
105 |
+
"""
|
106 |
+
if state.is_local_process_zero and not self.in_training:
|
107 |
+
self.cur_steps = 0
|
108 |
+
self.max_steps = 0
|
109 |
+
|
110 |
+
def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs) -> None:
|
111 |
+
r"""
|
112 |
+
Event called after logging the last logs.
|
113 |
+
"""
|
114 |
+
if not state.is_local_process_zero:
|
115 |
+
return
|
116 |
+
|
117 |
+
logs = dict(
|
118 |
+
current_steps=self.cur_steps,
|
119 |
+
total_steps=self.max_steps,
|
120 |
+
loss=state.log_history[-1].get("loss", None),
|
121 |
+
eval_loss=state.log_history[-1].get("eval_loss", None),
|
122 |
+
predict_loss=state.log_history[-1].get("predict_loss", None),
|
123 |
+
reward=state.log_history[-1].get("reward", None),
|
124 |
+
learning_rate=state.log_history[-1].get("learning_rate", None),
|
125 |
+
epoch=state.log_history[-1].get("epoch", None),
|
126 |
+
percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,
|
127 |
+
elapsed_time=self.elapsed_time,
|
128 |
+
remaining_time=self.remaining_time
|
129 |
+
)
|
130 |
+
if self.runner is not None:
|
131 |
+
logger.info("{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format(
|
132 |
+
logs["loss"] or 0, logs["learning_rate"] or 0, logs["epoch"] or 0
|
133 |
+
))
|
134 |
+
|
135 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
136 |
+
with open(os.path.join(args.output_dir, "trainer_log.jsonl"), "a", encoding="utf-8") as f:
|
137 |
+
f.write(json.dumps(logs) + "\n")
|
138 |
+
|
139 |
+
def on_prediction_step(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
140 |
+
r"""
|
141 |
+
Event called after a prediction step.
|
142 |
+
"""
|
143 |
+
eval_dataloader = kwargs.pop("eval_dataloader", None)
|
144 |
+
if state.is_local_process_zero and has_length(eval_dataloader) and not self.in_training:
|
145 |
+
if self.max_steps == 0:
|
146 |
+
self.max_steps = len(eval_dataloader)
|
147 |
+
self.cur_steps += 1
|
148 |
+
self.timing()
|
LLM-Detector-V7-11w/src/llmtuner/extras/constants.py
ADDED
@@ -0,0 +1,707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
from collections import defaultdict, OrderedDict
|
3 |
+
from typing import Dict, Optional
|
4 |
+
|
5 |
+
|
6 |
+
CHOICES = ["A", "B", "C", "D"]
|
7 |
+
|
8 |
+
DEFAULT_MODULE = defaultdict(str)
|
9 |
+
|
10 |
+
DEFAULT_TEMPLATE = defaultdict(str)
|
11 |
+
|
12 |
+
FILEEXT2TYPE = {
|
13 |
+
"arrow": "arrow",
|
14 |
+
"csv": "csv",
|
15 |
+
"json": "json",
|
16 |
+
"jsonl": "json",
|
17 |
+
"parquet": "parquet",
|
18 |
+
"txt": "text"
|
19 |
+
}
|
20 |
+
|
21 |
+
IGNORE_INDEX = -100
|
22 |
+
|
23 |
+
LAYERNORM_NAMES = {"norm", "ln"}
|
24 |
+
|
25 |
+
LOG_FILE_NAME = "trainer_log.jsonl"
|
26 |
+
|
27 |
+
METHODS = ["full", "freeze", "lora"]
|
28 |
+
|
29 |
+
PEFT_METHODS = ["lora"]
|
30 |
+
|
31 |
+
SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
|
32 |
+
|
33 |
+
SUPPORTED_MODELS = OrderedDict()
|
34 |
+
|
35 |
+
TRAINING_STAGES = {
|
36 |
+
"Supervised Fine-Tuning": "sft",
|
37 |
+
"Reward Modeling": "rm",
|
38 |
+
"PPO": "ppo",
|
39 |
+
"DPO": "dpo",
|
40 |
+
"Pre-Training": "pt"
|
41 |
+
}
|
42 |
+
|
43 |
+
V_HEAD_WEIGHTS_NAME = "value_head.bin"
|
44 |
+
|
45 |
+
V_HEAD_SAFE_WEIGHTS_NAME = "value_head.safetensors"
|
46 |
+
|
47 |
+
class DownloadSource(str, Enum):
|
48 |
+
DEFAULT = "hf"
|
49 |
+
MODELSCOPE = "ms"
|
50 |
+
|
51 |
+
|
52 |
+
def register_model_group(
|
53 |
+
models: Dict[str, Dict[DownloadSource, str]],
|
54 |
+
module: Optional[str] = None,
|
55 |
+
template: Optional[str] = None
|
56 |
+
) -> None:
|
57 |
+
prefix = None
|
58 |
+
for name, path in models.items():
|
59 |
+
if prefix is None:
|
60 |
+
prefix = name.split("-")[0]
|
61 |
+
else:
|
62 |
+
assert prefix == name.split("-")[0], "prefix should be identical."
|
63 |
+
SUPPORTED_MODELS[name] = path
|
64 |
+
if module is not None:
|
65 |
+
DEFAULT_MODULE[prefix] = module
|
66 |
+
if template is not None:
|
67 |
+
DEFAULT_TEMPLATE[prefix] = template
|
68 |
+
|
69 |
+
|
70 |
+
register_model_group(
|
71 |
+
models={
|
72 |
+
"Baichuan-7B-Base": {
|
73 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan-7B",
|
74 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/baichuan-7B"
|
75 |
+
},
|
76 |
+
"Baichuan-13B-Base": {
|
77 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Base",
|
78 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Base"
|
79 |
+
},
|
80 |
+
"Baichuan-13B-Chat": {
|
81 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Chat",
|
82 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat"
|
83 |
+
}
|
84 |
+
},
|
85 |
+
module="W_pack",
|
86 |
+
template="baichuan"
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
register_model_group(
|
91 |
+
models={
|
92 |
+
"Baichuan2-7B-Base": {
|
93 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Base",
|
94 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Base"
|
95 |
+
},
|
96 |
+
"Baichuan2-13B-Base": {
|
97 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base",
|
98 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base"
|
99 |
+
},
|
100 |
+
"Baichuan2-7B-Chat": {
|
101 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat",
|
102 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat"
|
103 |
+
},
|
104 |
+
"Baichuan2-13B-Chat": {
|
105 |
+
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat",
|
106 |
+
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat"
|
107 |
+
}
|
108 |
+
},
|
109 |
+
module="W_pack",
|
110 |
+
template="baichuan2"
|
111 |
+
)
|
112 |
+
|
113 |
+
|
114 |
+
register_model_group(
|
115 |
+
models={
|
116 |
+
"BLOOM-560M": {
|
117 |
+
DownloadSource.DEFAULT: "bigscience/bloom-560m",
|
118 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-560m"
|
119 |
+
},
|
120 |
+
"BLOOM-3B": {
|
121 |
+
DownloadSource.DEFAULT: "bigscience/bloom-3b",
|
122 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-3b"
|
123 |
+
},
|
124 |
+
"BLOOM-7B1": {
|
125 |
+
DownloadSource.DEFAULT: "bigscience/bloom-7b1",
|
126 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1"
|
127 |
+
}
|
128 |
+
},
|
129 |
+
module="query_key_value"
|
130 |
+
)
|
131 |
+
|
132 |
+
|
133 |
+
register_model_group(
|
134 |
+
models={
|
135 |
+
"BLOOMZ-560M": {
|
136 |
+
DownloadSource.DEFAULT: "bigscience/bloomz-560m",
|
137 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-560m"
|
138 |
+
},
|
139 |
+
"BLOOMZ-3B": {
|
140 |
+
DownloadSource.DEFAULT: "bigscience/bloomz-3b",
|
141 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-3b"
|
142 |
+
},
|
143 |
+
"BLOOMZ-7B1-mt": {
|
144 |
+
DownloadSource.DEFAULT: "bigscience/bloomz-7b1-mt",
|
145 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt"
|
146 |
+
}
|
147 |
+
},
|
148 |
+
module="query_key_value"
|
149 |
+
)
|
150 |
+
|
151 |
+
|
152 |
+
register_model_group(
|
153 |
+
models={
|
154 |
+
"BlueLM-7B-Base": {
|
155 |
+
DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Base",
|
156 |
+
DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Base"
|
157 |
+
},
|
158 |
+
"BlueLM-7B-Chat": {
|
159 |
+
DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Chat",
|
160 |
+
DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Chat"
|
161 |
+
}
|
162 |
+
},
|
163 |
+
template="bluelm"
|
164 |
+
)
|
165 |
+
|
166 |
+
|
167 |
+
register_model_group(
|
168 |
+
models={
|
169 |
+
"ChatGLM2-6B-Chat": {
|
170 |
+
DownloadSource.DEFAULT: "THUDM/chatglm2-6b",
|
171 |
+
DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b"
|
172 |
+
}
|
173 |
+
},
|
174 |
+
module="query_key_value",
|
175 |
+
template="chatglm2"
|
176 |
+
)
|
177 |
+
|
178 |
+
|
179 |
+
register_model_group(
|
180 |
+
models={
|
181 |
+
"ChatGLM3-6B-Base": {
|
182 |
+
DownloadSource.DEFAULT: "THUDM/chatglm3-6b-base",
|
183 |
+
DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b-base"
|
184 |
+
},
|
185 |
+
"ChatGLM3-6B-Chat": {
|
186 |
+
DownloadSource.DEFAULT: "THUDM/chatglm3-6b",
|
187 |
+
DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b"
|
188 |
+
}
|
189 |
+
},
|
190 |
+
module="query_key_value",
|
191 |
+
template="chatglm3"
|
192 |
+
)
|
193 |
+
|
194 |
+
|
195 |
+
register_model_group(
|
196 |
+
models={
|
197 |
+
"ChineseLLaMA2-1.3B": {
|
198 |
+
DownloadSource.DEFAULT: "hfl/chinese-llama-2-1.3b",
|
199 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-1.3b"
|
200 |
+
},
|
201 |
+
"ChineseLLaMA2-7B": {
|
202 |
+
DownloadSource.DEFAULT: "hfl/chinese-llama-2-7b",
|
203 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-7b"
|
204 |
+
},
|
205 |
+
"ChineseLLaMA2-13B": {
|
206 |
+
DownloadSource.DEFAULT: "hfl/chinese-llama-2-13b",
|
207 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-13b"
|
208 |
+
},
|
209 |
+
"ChineseLLaMA2-1.3B-Chat": {
|
210 |
+
DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-1.3b",
|
211 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-1.3b"
|
212 |
+
},
|
213 |
+
"ChineseLLaMA2-7B-Chat": {
|
214 |
+
DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-7b",
|
215 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-7b"
|
216 |
+
},
|
217 |
+
"ChineseLLaMA2-13B-Chat": {
|
218 |
+
DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-13b",
|
219 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-13b"
|
220 |
+
}
|
221 |
+
},
|
222 |
+
template="llama2_zh"
|
223 |
+
)
|
224 |
+
|
225 |
+
|
226 |
+
register_model_group(
|
227 |
+
models={
|
228 |
+
"DeepseekLLM-7B-Base": {
|
229 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-base",
|
230 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-base"
|
231 |
+
},
|
232 |
+
"DeepseekLLM-67B-Base": {
|
233 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-base",
|
234 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-base"
|
235 |
+
},
|
236 |
+
"DeepseekLLM-7B-Chat": {
|
237 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-chat",
|
238 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-chat"
|
239 |
+
},
|
240 |
+
"DeepseekLLM-67B-Chat": {
|
241 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-chat",
|
242 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat"
|
243 |
+
}
|
244 |
+
},
|
245 |
+
template="deepseek"
|
246 |
+
)
|
247 |
+
|
248 |
+
|
249 |
+
register_model_group(
|
250 |
+
models={
|
251 |
+
"DeepseekCoder-6.7B-Base": {
|
252 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base",
|
253 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base"
|
254 |
+
},
|
255 |
+
"DeepseekCoder-33B-Base": {
|
256 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
|
257 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base"
|
258 |
+
},
|
259 |
+
"DeepseekCoder-6.7B-Chat": {
|
260 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct",
|
261 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct"
|
262 |
+
},
|
263 |
+
"DeepseekCoder-33B-Chat": {
|
264 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
|
265 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct"
|
266 |
+
}
|
267 |
+
},
|
268 |
+
template="deepseekcoder"
|
269 |
+
)
|
270 |
+
|
271 |
+
|
272 |
+
register_model_group(
|
273 |
+
models={
|
274 |
+
"DeepseekMoE-16B-Base": {
|
275 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base",
|
276 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base"
|
277 |
+
},
|
278 |
+
"DeepseekMoE-16B-Chat": {
|
279 |
+
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat",
|
280 |
+
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat"
|
281 |
+
}
|
282 |
+
},
|
283 |
+
template="deepseek"
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
+
register_model_group(
|
288 |
+
models={
|
289 |
+
"Falcon-7B": {
|
290 |
+
DownloadSource.DEFAULT: "tiiuae/falcon-7b",
|
291 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b"
|
292 |
+
},
|
293 |
+
"Falcon-40B": {
|
294 |
+
DownloadSource.DEFAULT: "tiiuae/falcon-40b",
|
295 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b"
|
296 |
+
},
|
297 |
+
"Falcon-180B": {
|
298 |
+
DownloadSource.DEFAULT: "tiiuae/falcon-180b",
|
299 |
+
DownloadSource.MODELSCOPE: "modelscope/falcon-180B"
|
300 |
+
},
|
301 |
+
"Falcon-7B-Chat": {
|
302 |
+
DownloadSource.DEFAULT: "tiiuae/falcon-7b-instruct",
|
303 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b-instruct"
|
304 |
+
},
|
305 |
+
"Falcon-40B-Chat": {
|
306 |
+
DownloadSource.DEFAULT: "tiiuae/falcon-40b-instruct",
|
307 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b-instruct"
|
308 |
+
},
|
309 |
+
"Falcon-180B-Chat": {
|
310 |
+
DownloadSource.DEFAULT: "tiiuae/falcon-180b-chat",
|
311 |
+
DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat"
|
312 |
+
}
|
313 |
+
},
|
314 |
+
module="query_key_value",
|
315 |
+
template="falcon"
|
316 |
+
)
|
317 |
+
|
318 |
+
|
319 |
+
register_model_group(
|
320 |
+
models={
|
321 |
+
"InternLM-7B": {
|
322 |
+
DownloadSource.DEFAULT: "internlm/internlm-7b",
|
323 |
+
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-7b"
|
324 |
+
},
|
325 |
+
"InternLM-20B": {
|
326 |
+
DownloadSource.DEFAULT: "internlm/internlm-20b",
|
327 |
+
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-20b"
|
328 |
+
},
|
329 |
+
"InternLM-7B-Chat": {
|
330 |
+
DownloadSource.DEFAULT: "internlm/internlm-chat-7b",
|
331 |
+
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-7b"
|
332 |
+
},
|
333 |
+
"InternLM-20B-Chat": {
|
334 |
+
DownloadSource.DEFAULT: "internlm/internlm-chat-20b",
|
335 |
+
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-20b"
|
336 |
+
}
|
337 |
+
},
|
338 |
+
template="intern"
|
339 |
+
)
|
340 |
+
|
341 |
+
|
342 |
+
register_model_group(
|
343 |
+
models={
|
344 |
+
"LingoWhale-8B": {
|
345 |
+
DownloadSource.DEFAULT: "deeplang-ai/LingoWhale-8B",
|
346 |
+
DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B"
|
347 |
+
}
|
348 |
+
},
|
349 |
+
module="qkv_proj"
|
350 |
+
)
|
351 |
+
|
352 |
+
|
353 |
+
register_model_group(
|
354 |
+
models={
|
355 |
+
"LLaMA-7B": {
|
356 |
+
DownloadSource.DEFAULT: "huggyllama/llama-7b",
|
357 |
+
DownloadSource.MODELSCOPE: "skyline2006/llama-7b"
|
358 |
+
},
|
359 |
+
"LLaMA-13B": {
|
360 |
+
DownloadSource.DEFAULT: "huggyllama/llama-13b",
|
361 |
+
DownloadSource.MODELSCOPE: "skyline2006/llama-13b"
|
362 |
+
},
|
363 |
+
"LLaMA-30B": {
|
364 |
+
DownloadSource.DEFAULT: "huggyllama/llama-30b",
|
365 |
+
DownloadSource.MODELSCOPE: "skyline2006/llama-30b"
|
366 |
+
},
|
367 |
+
"LLaMA-65B": {
|
368 |
+
DownloadSource.DEFAULT: "huggyllama/llama-65b",
|
369 |
+
DownloadSource.MODELSCOPE: "skyline2006/llama-65b"
|
370 |
+
}
|
371 |
+
}
|
372 |
+
)
|
373 |
+
|
374 |
+
|
375 |
+
register_model_group(
|
376 |
+
models={
|
377 |
+
"LLaMA2-7B": {
|
378 |
+
DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-hf",
|
379 |
+
DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-ms"
|
380 |
+
},
|
381 |
+
"LLaMA2-13B": {
|
382 |
+
DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-hf",
|
383 |
+
DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-ms"
|
384 |
+
},
|
385 |
+
"LLaMA2-70B": {
|
386 |
+
DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-hf",
|
387 |
+
DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-ms"
|
388 |
+
},
|
389 |
+
"LLaMA2-7B-Chat": {
|
390 |
+
DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-chat-hf",
|
391 |
+
DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-chat-ms"
|
392 |
+
},
|
393 |
+
"LLaMA2-13B-Chat": {
|
394 |
+
DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-chat-hf",
|
395 |
+
DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-chat-ms"
|
396 |
+
},
|
397 |
+
"LLaMA2-70B-Chat": {
|
398 |
+
DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-chat-hf",
|
399 |
+
DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-chat-ms"
|
400 |
+
}
|
401 |
+
},
|
402 |
+
template="llama2"
|
403 |
+
)
|
404 |
+
|
405 |
+
|
406 |
+
register_model_group(
|
407 |
+
models={
|
408 |
+
"Mistral-7B": {
|
409 |
+
DownloadSource.DEFAULT: "mistralai/Mistral-7B-v0.1",
|
410 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-v0.1"
|
411 |
+
},
|
412 |
+
"Mistral-7B-Chat": {
|
413 |
+
DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.1",
|
414 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.1"
|
415 |
+
},
|
416 |
+
"Mistral-7B-v0.2-Chat": {
|
417 |
+
DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.2",
|
418 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.2"
|
419 |
+
}
|
420 |
+
},
|
421 |
+
template="mistral"
|
422 |
+
)
|
423 |
+
|
424 |
+
|
425 |
+
register_model_group(
|
426 |
+
models={
|
427 |
+
"Mixtral-8x7B": {
|
428 |
+
DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-v0.1",
|
429 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-v0.1"
|
430 |
+
},
|
431 |
+
"Mixtral-8x7B-Chat": {
|
432 |
+
DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
433 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-Instruct-v0.1"
|
434 |
+
}
|
435 |
+
},
|
436 |
+
template="mistral"
|
437 |
+
)
|
438 |
+
|
439 |
+
|
440 |
+
register_model_group(
|
441 |
+
models={
|
442 |
+
"OpenChat3.5-7B-Chat": {
|
443 |
+
DownloadSource.DEFAULT: "openchat/openchat_3.5",
|
444 |
+
DownloadSource.MODELSCOPE: "myxiongmodel/openchat_3.5"
|
445 |
+
}
|
446 |
+
},
|
447 |
+
template="openchat"
|
448 |
+
)
|
449 |
+
|
450 |
+
|
451 |
+
register_model_group(
|
452 |
+
models={
|
453 |
+
"Phi-1.5-1.3B": {
|
454 |
+
DownloadSource.DEFAULT: "microsoft/phi-1_5",
|
455 |
+
DownloadSource.MODELSCOPE: "allspace/PHI_1-5"
|
456 |
+
},
|
457 |
+
"Phi-2-2.7B": {
|
458 |
+
DownloadSource.DEFAULT: "microsoft/phi-2",
|
459 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/phi-2"
|
460 |
+
}
|
461 |
+
}
|
462 |
+
)
|
463 |
+
|
464 |
+
|
465 |
+
register_model_group(
|
466 |
+
models={
|
467 |
+
"Qwen-1.8B": {
|
468 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B",
|
469 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B"
|
470 |
+
},
|
471 |
+
"Qwen-7B": {
|
472 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-7B",
|
473 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-7B"
|
474 |
+
},
|
475 |
+
"Qwen-14B": {
|
476 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-14B",
|
477 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-14B"
|
478 |
+
},
|
479 |
+
"Qwen-72B": {
|
480 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-72B",
|
481 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-72B"
|
482 |
+
},
|
483 |
+
"Qwen-1.8B-Chat": {
|
484 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat",
|
485 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat"
|
486 |
+
},
|
487 |
+
"Qwen-7B-Chat": {
|
488 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat",
|
489 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat"
|
490 |
+
},
|
491 |
+
"Qwen-14B-Chat": {
|
492 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat",
|
493 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat"
|
494 |
+
},
|
495 |
+
"Qwen-72B-Chat": {
|
496 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat",
|
497 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat"
|
498 |
+
},
|
499 |
+
"Qwen-1.8B-int8-Chat": {
|
500 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int8",
|
501 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int8"
|
502 |
+
},
|
503 |
+
"Qwen-1.8B-int4-Chat": {
|
504 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int4",
|
505 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int4"
|
506 |
+
},
|
507 |
+
"Qwen-7B-int8-Chat": {
|
508 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int8",
|
509 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int8"
|
510 |
+
},
|
511 |
+
"Qwen-7B-int4-Chat": {
|
512 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int4",
|
513 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int4"
|
514 |
+
},
|
515 |
+
"Qwen-14B-int8-Chat": {
|
516 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int8",
|
517 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int8"
|
518 |
+
},
|
519 |
+
"Qwen-14B-int4-Chat": {
|
520 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int4",
|
521 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int4"
|
522 |
+
},
|
523 |
+
"Qwen-72B-int8-Chat": {
|
524 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int8",
|
525 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int8"
|
526 |
+
},
|
527 |
+
"Qwen-72B-int4-Chat": {
|
528 |
+
DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int4",
|
529 |
+
DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4"
|
530 |
+
}
|
531 |
+
},
|
532 |
+
module="c_attn",
|
533 |
+
template="qwen"
|
534 |
+
)
|
535 |
+
|
536 |
+
|
537 |
+
register_model_group(
|
538 |
+
models={
|
539 |
+
"SOLAR-10.7B": {
|
540 |
+
DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-v1.0"
|
541 |
+
},
|
542 |
+
"SOLAR-10.7B-Chat": {
|
543 |
+
DownloadSource.DEFAULT: "upstage/SOLAR-10.7B-Instruct-v1.0",
|
544 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/SOLAR-10.7B-Instruct-v1.0"
|
545 |
+
}
|
546 |
+
},
|
547 |
+
template="solar"
|
548 |
+
)
|
549 |
+
|
550 |
+
|
551 |
+
register_model_group(
|
552 |
+
models={
|
553 |
+
"Skywork-13B-Base": {
|
554 |
+
DownloadSource.DEFAULT: "Skywork/Skywork-13B-base",
|
555 |
+
DownloadSource.MODELSCOPE: "skywork/Skywork-13B-base"
|
556 |
+
}
|
557 |
+
}
|
558 |
+
)
|
559 |
+
|
560 |
+
|
561 |
+
register_model_group(
|
562 |
+
models={
|
563 |
+
"Vicuna1.5-7B-Chat": {
|
564 |
+
DownloadSource.DEFAULT: "lmsys/vicuna-7b-v1.5",
|
565 |
+
DownloadSource.MODELSCOPE: "Xorbits/vicuna-7b-v1.5"
|
566 |
+
},
|
567 |
+
"Vicuna1.5-13B-Chat": {
|
568 |
+
DownloadSource.DEFAULT: "lmsys/vicuna-13b-v1.5",
|
569 |
+
DownloadSource.MODELSCOPE: "Xorbits/vicuna-13b-v1.5"
|
570 |
+
}
|
571 |
+
},
|
572 |
+
template="vicuna"
|
573 |
+
)
|
574 |
+
|
575 |
+
|
576 |
+
register_model_group(
|
577 |
+
models={
|
578 |
+
"XuanYuan-70B": {
|
579 |
+
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B"
|
580 |
+
},
|
581 |
+
"XuanYuan-70B-Chat": {
|
582 |
+
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat"
|
583 |
+
},
|
584 |
+
"XuanYuan-70B-int8-Chat": {
|
585 |
+
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit"
|
586 |
+
},
|
587 |
+
"XuanYuan-70B-int4-Chat": {
|
588 |
+
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit"
|
589 |
+
}
|
590 |
+
},
|
591 |
+
template="xuanyuan"
|
592 |
+
)
|
593 |
+
|
594 |
+
|
595 |
+
register_model_group(
|
596 |
+
models={
|
597 |
+
"XVERSE-7B": {
|
598 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-7B",
|
599 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-7B"
|
600 |
+
},
|
601 |
+
"XVERSE-13B": {
|
602 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-13B",
|
603 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-13B"
|
604 |
+
},
|
605 |
+
"XVERSE-65B": {
|
606 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-65B",
|
607 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-65B"
|
608 |
+
},
|
609 |
+
"XVERSE-65B-2": {
|
610 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-65B-2",
|
611 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-2"
|
612 |
+
},
|
613 |
+
"XVERSE-7B-Chat": {
|
614 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-7B-Chat",
|
615 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-7B-Chat"
|
616 |
+
},
|
617 |
+
"XVERSE-13B-Chat": {
|
618 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-13B-Chat",
|
619 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-13B-Chat"
|
620 |
+
},
|
621 |
+
"XVERSE-65B-Chat": {
|
622 |
+
DownloadSource.DEFAULT: "xverse/XVERSE-65B-Chat",
|
623 |
+
DownloadSource.MODELSCOPE: "xverse/XVERSE-65B-Chat"
|
624 |
+
}
|
625 |
+
},
|
626 |
+
template="xverse"
|
627 |
+
)
|
628 |
+
|
629 |
+
|
630 |
+
register_model_group(
|
631 |
+
models={
|
632 |
+
"Yayi-7B": {
|
633 |
+
DownloadSource.DEFAULT: "wenge-research/yayi-7b-llama2",
|
634 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-7b-llama2"
|
635 |
+
},
|
636 |
+
"Yayi-13B": {
|
637 |
+
DownloadSource.DEFAULT: "wenge-research/yayi-13b-llama2",
|
638 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-13b-llama2"
|
639 |
+
}
|
640 |
+
},
|
641 |
+
template="yayi"
|
642 |
+
)
|
643 |
+
|
644 |
+
|
645 |
+
register_model_group(
|
646 |
+
models={
|
647 |
+
"Yi-6B": {
|
648 |
+
DownloadSource.DEFAULT: "01-ai/Yi-6B",
|
649 |
+
DownloadSource.MODELSCOPE: "01ai/Yi-6B"
|
650 |
+
},
|
651 |
+
"Yi-34B": {
|
652 |
+
DownloadSource.DEFAULT: "01-ai/Yi-34B",
|
653 |
+
DownloadSource.MODELSCOPE: "01ai/Yi-34B"
|
654 |
+
},
|
655 |
+
"Yi-6B-Chat": {
|
656 |
+
DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat",
|
657 |
+
DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat"
|
658 |
+
},
|
659 |
+
"Yi-34B-Chat": {
|
660 |
+
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat",
|
661 |
+
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat"
|
662 |
+
},
|
663 |
+
"Yi-6B-int8-Chat": {
|
664 |
+
DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat-8bits",
|
665 |
+
DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat-8bits"
|
666 |
+
},
|
667 |
+
"Yi-34B-int8-Chat": {
|
668 |
+
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-8bits",
|
669 |
+
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-8bits"
|
670 |
+
}
|
671 |
+
},
|
672 |
+
template="yi"
|
673 |
+
)
|
674 |
+
|
675 |
+
|
676 |
+
register_model_group(
|
677 |
+
models={
|
678 |
+
"Yuan2-2B-Chat": {
|
679 |
+
DownloadSource.DEFAULT: "IEITYuan/Yuan2-2B-hf",
|
680 |
+
DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-2B-hf"
|
681 |
+
},
|
682 |
+
"Yuan2-51B-Chat": {
|
683 |
+
DownloadSource.DEFAULT: "IEITYuan/Yuan2-51B-hf",
|
684 |
+
DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-51B-hf"
|
685 |
+
},
|
686 |
+
"Yuan2-102B-Chat": {
|
687 |
+
DownloadSource.DEFAULT: "IEITYuan/Yuan2-102B-hf",
|
688 |
+
DownloadSource.MODELSCOPE: "YuanLLM/Yuan2.0-102B-hf"
|
689 |
+
}
|
690 |
+
},
|
691 |
+
template="yuan"
|
692 |
+
)
|
693 |
+
|
694 |
+
|
695 |
+
register_model_group(
|
696 |
+
models={
|
697 |
+
"Zephyr-7B-Alpha-Chat": {
|
698 |
+
DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-alpha",
|
699 |
+
DownloadSource.MODELSCOPE: "AI-ModelScope/zephyr-7b-alpha"
|
700 |
+
},
|
701 |
+
"Zephyr-7B-Beta-Chat": {
|
702 |
+
DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-beta",
|
703 |
+
DownloadSource.MODELSCOPE: "modelscope/zephyr-7b-beta"
|
704 |
+
}
|
705 |
+
},
|
706 |
+
template="zephyr"
|
707 |
+
)
|
LLM-Detector-V7-11w/src/llmtuner/extras/logging.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import logging
|
3 |
+
|
4 |
+
|
5 |
+
class LoggerHandler(logging.Handler):
|
6 |
+
r"""
|
7 |
+
Logger handler used in Web UI.
|
8 |
+
"""
|
9 |
+
|
10 |
+
def __init__(self):
|
11 |
+
super().__init__()
|
12 |
+
self.log = ""
|
13 |
+
|
14 |
+
def reset(self):
|
15 |
+
self.log = ""
|
16 |
+
|
17 |
+
def emit(self, record):
|
18 |
+
if record.name == "httpx":
|
19 |
+
return
|
20 |
+
log_entry = self.format(record)
|
21 |
+
self.log += log_entry
|
22 |
+
self.log += "\n\n"
|
23 |
+
|
24 |
+
|
25 |
+
def get_logger(name: str) -> logging.Logger:
|
26 |
+
r"""
|
27 |
+
Gets a standard logger with a stream hander to stdout.
|
28 |
+
"""
|
29 |
+
formatter = logging.Formatter(
|
30 |
+
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
31 |
+
datefmt="%m/%d/%Y %H:%M:%S"
|
32 |
+
)
|
33 |
+
handler = logging.StreamHandler(sys.stdout)
|
34 |
+
handler.setFormatter(formatter)
|
35 |
+
|
36 |
+
logger = logging.getLogger(name)
|
37 |
+
logger.setLevel(logging.INFO)
|
38 |
+
logger.addHandler(handler)
|
39 |
+
|
40 |
+
return logger
|
41 |
+
|
42 |
+
|
43 |
+
def reset_logging() -> None:
|
44 |
+
r"""
|
45 |
+
Removes basic config of root logger. (unused in script)
|
46 |
+
"""
|
47 |
+
root = logging.getLogger()
|
48 |
+
list(map(root.removeHandler, root.handlers))
|
49 |
+
list(map(root.removeFilter, root.filters))
|
LLM-Detector-V7-11w/src/llmtuner/extras/misc.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gc
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
from typing import TYPE_CHECKING, Dict, Tuple
|
5 |
+
from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTrainedModel
|
6 |
+
from transformers.utils import (
|
7 |
+
WEIGHTS_NAME,
|
8 |
+
SAFE_WEIGHTS_NAME,
|
9 |
+
is_torch_bf16_gpu_available,
|
10 |
+
is_torch_cuda_available,
|
11 |
+
is_torch_npu_available,
|
12 |
+
is_torch_xpu_available
|
13 |
+
)
|
14 |
+
from peft import PeftModel
|
15 |
+
|
16 |
+
from llmtuner.extras.constants import V_HEAD_WEIGHTS_NAME, V_HEAD_SAFE_WEIGHTS_NAME
|
17 |
+
from llmtuner.extras.logging import get_logger
|
18 |
+
|
19 |
+
|
20 |
+
_is_fp16_available = is_torch_npu_available() or is_torch_cuda_available()
|
21 |
+
try:
|
22 |
+
_is_bf16_available = is_torch_bf16_gpu_available()
|
23 |
+
except:
|
24 |
+
_is_bf16_available = False
|
25 |
+
|
26 |
+
|
27 |
+
if TYPE_CHECKING:
|
28 |
+
from trl import AutoModelForCausalLMWithValueHead
|
29 |
+
from llmtuner.hparams import ModelArguments
|
30 |
+
|
31 |
+
|
32 |
+
logger = get_logger(__name__)
|
33 |
+
|
34 |
+
|
35 |
+
class AverageMeter:
|
36 |
+
r"""
|
37 |
+
Computes and stores the average and current value.
|
38 |
+
"""
|
39 |
+
def __init__(self):
|
40 |
+
self.reset()
|
41 |
+
|
42 |
+
def reset(self):
|
43 |
+
self.val = 0
|
44 |
+
self.avg = 0
|
45 |
+
self.sum = 0
|
46 |
+
self.count = 0
|
47 |
+
|
48 |
+
def update(self, val, n=1):
|
49 |
+
self.val = val
|
50 |
+
self.sum += val * n
|
51 |
+
self.count += n
|
52 |
+
self.avg = self.sum / self.count
|
53 |
+
|
54 |
+
|
55 |
+
def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
|
56 |
+
r"""
|
57 |
+
Returns the number of trainable parameters and number of all parameters in the model.
|
58 |
+
"""
|
59 |
+
trainable_params, all_param = 0, 0
|
60 |
+
for param in model.parameters():
|
61 |
+
num_params = param.numel()
|
62 |
+
# if using DS Zero 3 and the weights are initialized empty
|
63 |
+
if num_params == 0 and hasattr(param, "ds_numel"):
|
64 |
+
num_params = param.ds_numel
|
65 |
+
|
66 |
+
# Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by 2
|
67 |
+
if param.__class__.__name__ == "Params4bit":
|
68 |
+
num_params = num_params * 2
|
69 |
+
|
70 |
+
all_param += num_params
|
71 |
+
if param.requires_grad:
|
72 |
+
trainable_params += num_params
|
73 |
+
|
74 |
+
return trainable_params, all_param
|
75 |
+
|
76 |
+
|
77 |
+
def fix_valuehead_checkpoint(
|
78 |
+
model: "AutoModelForCausalLMWithValueHead",
|
79 |
+
output_dir: str,
|
80 |
+
safe_serialization: bool
|
81 |
+
) -> None:
|
82 |
+
r"""
|
83 |
+
The model is already unwrapped.
|
84 |
+
|
85 |
+
There are three cases:
|
86 |
+
1. full tuning without ds_zero3: state_dict = {"model.layers.*": ..., "v_head.summary.*": ...}
|
87 |
+
2. lora tuning without ds_zero3: state_dict = {"v_head.summary.*": ...}
|
88 |
+
3. under deepspeed zero3: state_dict = {"pretrained_model.model.layers.*": ..., "v_head.summary.*": ...}
|
89 |
+
|
90 |
+
We assume `stage3_gather_16bit_weights_on_model_save=true`.
|
91 |
+
"""
|
92 |
+
if not isinstance(model.pretrained_model, (PreTrainedModel, PeftModel)):
|
93 |
+
return
|
94 |
+
|
95 |
+
if safe_serialization:
|
96 |
+
from safetensors import safe_open
|
97 |
+
from safetensors.torch import save_file
|
98 |
+
path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME)
|
99 |
+
with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f:
|
100 |
+
state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()}
|
101 |
+
else:
|
102 |
+
path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME)
|
103 |
+
state_dict: Dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu")
|
104 |
+
|
105 |
+
decoder_state_dict = {}
|
106 |
+
v_head_state_dict = {}
|
107 |
+
for name, param in state_dict.items():
|
108 |
+
if name.startswith("v_head."):
|
109 |
+
v_head_state_dict[name] = param
|
110 |
+
else:
|
111 |
+
decoder_state_dict[name.replace("pretrained_model.", "")] = param
|
112 |
+
|
113 |
+
os.remove(path_to_checkpoint)
|
114 |
+
model.pretrained_model.save_pretrained(
|
115 |
+
output_dir,
|
116 |
+
state_dict=decoder_state_dict or None,
|
117 |
+
safe_serialization=safe_serialization
|
118 |
+
)
|
119 |
+
|
120 |
+
if safe_serialization:
|
121 |
+
save_file(v_head_state_dict, os.path.join(output_dir, V_HEAD_SAFE_WEIGHTS_NAME), metadata={"format": "pt"})
|
122 |
+
else:
|
123 |
+
torch.save(v_head_state_dict, os.path.join(output_dir, V_HEAD_WEIGHTS_NAME))
|
124 |
+
|
125 |
+
logger.info("Value head model saved at: {}".format(output_dir))
|
126 |
+
|
127 |
+
|
128 |
+
def get_current_device() -> torch.device:
|
129 |
+
r"""
|
130 |
+
Gets the current available device.
|
131 |
+
"""
|
132 |
+
if is_torch_xpu_available():
|
133 |
+
device = "xpu:{}".format(os.environ.get("LOCAL_RANK", "0"))
|
134 |
+
elif is_torch_npu_available():
|
135 |
+
device = "npu:{}".format(os.environ.get("LOCAL_RANK", "0"))
|
136 |
+
elif is_torch_cuda_available():
|
137 |
+
device = "cuda:{}".format(os.environ.get("LOCAL_RANK", "0"))
|
138 |
+
else:
|
139 |
+
device = "cpu"
|
140 |
+
|
141 |
+
return torch.device(device)
|
142 |
+
|
143 |
+
|
144 |
+
def get_device_count() -> int:
|
145 |
+
return torch.cuda.device_count()
|
146 |
+
|
147 |
+
|
148 |
+
def get_logits_processor() -> "LogitsProcessorList":
|
149 |
+
r"""
|
150 |
+
Gets logits processor that removes NaN and Inf logits.
|
151 |
+
"""
|
152 |
+
logits_processor = LogitsProcessorList()
|
153 |
+
logits_processor.append(InfNanRemoveLogitsProcessor())
|
154 |
+
return logits_processor
|
155 |
+
|
156 |
+
|
157 |
+
def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype:
|
158 |
+
r"""
|
159 |
+
Infers the optimal dtype according to the model_dtype and device compatibility.
|
160 |
+
"""
|
161 |
+
if _is_bf16_available and model_dtype == torch.bfloat16:
|
162 |
+
return torch.bfloat16
|
163 |
+
elif _is_fp16_available:
|
164 |
+
return torch.float16
|
165 |
+
else:
|
166 |
+
return torch.float32
|
167 |
+
|
168 |
+
|
169 |
+
def torch_gc() -> None:
|
170 |
+
r"""
|
171 |
+
Collects GPU memory.
|
172 |
+
"""
|
173 |
+
gc.collect()
|
174 |
+
if torch.cuda.is_available():
|
175 |
+
torch.cuda.empty_cache()
|
176 |
+
torch.cuda.ipc_collect()
|
177 |
+
|
178 |
+
|
179 |
+
def try_download_model_from_ms(model_args: "ModelArguments") -> None:
|
180 |
+
if not use_modelscope() or os.path.exists(model_args.model_name_or_path):
|
181 |
+
return
|
182 |
+
|
183 |
+
try:
|
184 |
+
from modelscope import snapshot_download
|
185 |
+
revision = "master" if model_args.model_revision == "main" else model_args.model_revision
|
186 |
+
model_args.model_name_or_path = snapshot_download(
|
187 |
+
model_args.model_name_or_path,
|
188 |
+
revision=revision,
|
189 |
+
cache_dir=model_args.cache_dir
|
190 |
+
)
|
191 |
+
except ImportError:
|
192 |
+
raise ImportError("Please install modelscope via `pip install modelscope -U`")
|
193 |
+
|
194 |
+
|
195 |
+
def use_modelscope() -> bool:
|
196 |
+
return bool(int(os.environ.get("USE_MODELSCOPE_HUB", "0")))
|
LLM-Detector-V7-11w/src/llmtuner/extras/packages.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib.metadata
|
2 |
+
import importlib.util
|
3 |
+
|
4 |
+
|
5 |
+
def is_package_available(name: str) -> bool:
|
6 |
+
return importlib.util.find_spec(name) is not None
|
7 |
+
|
8 |
+
|
9 |
+
def get_package_version(name: str) -> str:
|
10 |
+
try:
|
11 |
+
return importlib.metadata.version(name)
|
12 |
+
except:
|
13 |
+
return "0.0.0"
|
14 |
+
|
15 |
+
|
16 |
+
def is_fastapi_availble():
|
17 |
+
return is_package_available("fastapi")
|
18 |
+
|
19 |
+
|
20 |
+
def is_flash_attn2_available():
|
21 |
+
return is_package_available("flash_attn") and get_package_version("flash_attn").startswith("2")
|
22 |
+
|
23 |
+
|
24 |
+
def is_jieba_available():
|
25 |
+
return is_package_available("jieba")
|
26 |
+
|
27 |
+
|
28 |
+
def is_matplotlib_available():
|
29 |
+
return is_package_available("matplotlib")
|
30 |
+
|
31 |
+
|
32 |
+
def is_nltk_available():
|
33 |
+
return is_package_available("nltk")
|
34 |
+
|
35 |
+
|
36 |
+
def is_requests_available():
|
37 |
+
return is_package_available("requests")
|
38 |
+
|
39 |
+
|
40 |
+
def is_rouge_available():
|
41 |
+
return is_package_available("rouge_chinese")
|
42 |
+
|
43 |
+
|
44 |
+
def is_starlette_available():
|
45 |
+
return is_package_available("sse_starlette")
|
46 |
+
|
47 |
+
|
48 |
+
def is_uvicorn_available():
|
49 |
+
return is_package_available("uvicorn")
|
LLM-Detector-V7-11w/src/llmtuner/extras/patches/__init__.py
ADDED
File without changes
|
LLM-Detector-V7-11w/src/llmtuner/extras/patches/llama_patch.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
from typing import Optional, Tuple
|
5 |
+
from transformers.utils import logging
|
6 |
+
from transformers.models.llama.modeling_llama import LlamaAttention, apply_rotary_pos_emb
|
7 |
+
|
8 |
+
try:
|
9 |
+
from transformers.models.llama.modeling_llama import repeat_kv
|
10 |
+
except ImportError:
|
11 |
+
print("Please upgrade `transformers`.")
|
12 |
+
|
13 |
+
from llmtuner.extras.packages import is_flash_attn2_available
|
14 |
+
|
15 |
+
|
16 |
+
if is_flash_attn2_available():
|
17 |
+
from flash_attn import flash_attn_func, flash_attn_varlen_func # type: ignore
|
18 |
+
from flash_attn.bert_padding import pad_input, unpad_input # type: ignore
|
19 |
+
|
20 |
+
|
21 |
+
logger = logging.get_logger(__name__)
|
22 |
+
|
23 |
+
|
24 |
+
# Modified from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
|
25 |
+
class LlamaShiftShortAttention(LlamaAttention):
|
26 |
+
|
27 |
+
def forward(
|
28 |
+
self,
|
29 |
+
hidden_states: torch.Tensor,
|
30 |
+
attention_mask: Optional[torch.Tensor] = None,
|
31 |
+
position_ids: Optional[torch.LongTensor] = None,
|
32 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
33 |
+
output_attentions: bool = False,
|
34 |
+
use_cache: bool = False,
|
35 |
+
**kwargs
|
36 |
+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
37 |
+
bsz, q_len, _ = hidden_states.size()
|
38 |
+
|
39 |
+
query_states = self.q_proj(hidden_states)
|
40 |
+
key_states = self.k_proj(hidden_states)
|
41 |
+
value_states = self.v_proj(hidden_states)
|
42 |
+
|
43 |
+
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
44 |
+
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
45 |
+
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
46 |
+
|
47 |
+
kv_seq_len = key_states.shape[-2]
|
48 |
+
if past_key_value is not None:
|
49 |
+
kv_seq_len += past_key_value[0].shape[-2]
|
50 |
+
|
51 |
+
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
|
52 |
+
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
|
53 |
+
|
54 |
+
if past_key_value is not None: # reuse k, v, self_attention
|
55 |
+
key_states = torch.cat([past_key_value[0], key_states], dim=2)
|
56 |
+
value_states = torch.cat([past_key_value[1], value_states], dim=2)
|
57 |
+
|
58 |
+
past_key_value = (key_states, value_states) if use_cache else None
|
59 |
+
|
60 |
+
if getattr(self, "num_key_value_groups"):
|
61 |
+
key_states = repeat_kv(key_states, self.num_key_value_groups)
|
62 |
+
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
63 |
+
|
64 |
+
if getattr(self.config, "group_size_ratio", None) and self.training: # shift
|
65 |
+
groupsz = int(q_len * getattr(self.config, "group_size_ratio"))
|
66 |
+
assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz)
|
67 |
+
num_groups = q_len // groupsz
|
68 |
+
def shift(state: torch.Tensor) -> torch.Tensor:
|
69 |
+
state = state.transpose(1, 2) # output: (bsz, seq_len, n_heads, head_dim)
|
70 |
+
state = torch.cat((
|
71 |
+
state[:, :, :self.num_heads//2], state[:, :, self.num_heads//2:].roll(-groupsz//2, dims=1)
|
72 |
+
), dim=2)
|
73 |
+
return state.reshape(bsz * num_groups, groupsz, self.num_heads, self.head_dim).transpose(1, 2)
|
74 |
+
|
75 |
+
query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states)
|
76 |
+
if attention_mask is not None:
|
77 |
+
attention_mask = attention_mask[:, :, :groupsz, :groupsz].repeat(num_groups, 1, 1, 1)
|
78 |
+
|
79 |
+
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
|
80 |
+
|
81 |
+
if attention_mask is not None:
|
82 |
+
attn_weights = attn_weights + attention_mask
|
83 |
+
|
84 |
+
# upcast attention to fp32
|
85 |
+
attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
|
86 |
+
attn_output = torch.matmul(attn_weights, value_states) # (bsz, :, seq_len, :) or (bsz*n_group, :, groupsz, :)
|
87 |
+
attn_output = attn_output.transpose(1, 2).contiguous()
|
88 |
+
|
89 |
+
if getattr(self.config, "group_size_ratio", None) and self.training: # shift back
|
90 |
+
attn_output.reshape(bsz, q_len, self.num_heads, self.head_dim)
|
91 |
+
attn_output = torch.cat((
|
92 |
+
attn_output[:, :, :self.num_heads//2], attn_output[:, :, self.num_heads//2:].roll(groupsz//2, dims=1)
|
93 |
+
))
|
94 |
+
|
95 |
+
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
96 |
+
attn_output = self.o_proj(attn_output)
|
97 |
+
|
98 |
+
if not output_attentions:
|
99 |
+
attn_weights = None
|
100 |
+
|
101 |
+
return attn_output, attn_weights, past_key_value
|
102 |
+
|
103 |
+
|
104 |
+
class LlamaFlashAttention2(LlamaAttention):
|
105 |
+
|
106 |
+
def forward(
|
107 |
+
self,
|
108 |
+
hidden_states: torch.Tensor,
|
109 |
+
attention_mask: Optional[torch.Tensor] = None,
|
110 |
+
position_ids: Optional[torch.LongTensor] = None,
|
111 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
112 |
+
output_attentions: bool = False,
|
113 |
+
use_cache: bool = False,
|
114 |
+
**kwargs
|
115 |
+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
116 |
+
# LlamaFlashAttention2 attention does not support output_attentions
|
117 |
+
output_attentions = False
|
118 |
+
|
119 |
+
bsz, q_len, _ = hidden_states.size()
|
120 |
+
|
121 |
+
query_states = self.q_proj(hidden_states)
|
122 |
+
key_states = self.k_proj(hidden_states)
|
123 |
+
value_states = self.v_proj(hidden_states)
|
124 |
+
|
125 |
+
# FlashAttention requires the input to have the shape (bsz, seq_len, n_heads, head_dim)
|
126 |
+
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
127 |
+
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
128 |
+
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
129 |
+
|
130 |
+
kv_seq_len = key_states.shape[-2]
|
131 |
+
if past_key_value is not None:
|
132 |
+
kv_seq_len += past_key_value[0].shape[-2]
|
133 |
+
|
134 |
+
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
|
135 |
+
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
|
136 |
+
|
137 |
+
if past_key_value is not None: # reuse k, v, self_attention
|
138 |
+
key_states = torch.cat([past_key_value[0], key_states], dim=2)
|
139 |
+
value_states = torch.cat([past_key_value[1], value_states], dim=2)
|
140 |
+
|
141 |
+
past_key_value = (key_states, value_states) if use_cache else None
|
142 |
+
|
143 |
+
# cast to half precision
|
144 |
+
input_dtype = query_states.dtype
|
145 |
+
if input_dtype == torch.float32:
|
146 |
+
logger.warning_once("The input hidden states seems to be silently casted in float32.")
|
147 |
+
query_states = query_states.to(self.config.torch_dtype)
|
148 |
+
key_states = key_states.to(self.config.torch_dtype)
|
149 |
+
value_states = value_states.to(self.config.torch_dtype)
|
150 |
+
|
151 |
+
if getattr(self, "num_key_value_groups", None):
|
152 |
+
key_states = repeat_kv(key_states, self.num_key_value_groups)
|
153 |
+
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
154 |
+
|
155 |
+
query_states = query_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim)
|
156 |
+
key_states = key_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim)
|
157 |
+
value_states = value_states.transpose(1, 2) # (bsz, seq_len, n_heads, head_dim)
|
158 |
+
|
159 |
+
if getattr(self.config, "group_size_ratio", None) and self.training: # shift
|
160 |
+
groupsz = int(q_len * getattr(self.config, "group_size_ratio"))
|
161 |
+
assert q_len % groupsz == 0, "q_len {} should be divisible by group size {}.".format(q_len, groupsz)
|
162 |
+
num_groups = q_len // groupsz
|
163 |
+
def shift(state: torch.Tensor) -> torch.Tensor:
|
164 |
+
state = torch.cat((
|
165 |
+
state[:, :, :self.num_heads//2], state[:, :, self.num_heads//2:].roll(-groupsz//2, dims=1)
|
166 |
+
), dim=2)
|
167 |
+
return state.reshape(bsz * num_groups, groupsz, self.num_heads, self.head_dim)
|
168 |
+
|
169 |
+
query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states)
|
170 |
+
if attention_mask is not None:
|
171 |
+
attention_mask = attention_mask.reshape(bsz * num_groups, groupsz)
|
172 |
+
|
173 |
+
if attention_mask is not None:
|
174 |
+
logger.warning_once("Padded sequences are less efficient in FlashAttention.")
|
175 |
+
# -q_len: assumes left padding when q_len != kv_len
|
176 |
+
unpadded_q, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input(query_states, attention_mask[:, -q_len:])
|
177 |
+
unpadded_k, _, cu_seqlens_k, max_seqlen_k = unpad_input(key_states, attention_mask)
|
178 |
+
unpadded_v, _, _, _ = unpad_input(value_states, attention_mask)
|
179 |
+
attn_output_unpad = flash_attn_varlen_func(
|
180 |
+
unpadded_q,
|
181 |
+
unpadded_k,
|
182 |
+
unpadded_v,
|
183 |
+
cu_seqlens_q=cu_seqlens_q,
|
184 |
+
cu_seqlens_k=cu_seqlens_k,
|
185 |
+
max_seqlen_q=max_seqlen_q,
|
186 |
+
max_seqlen_k=max_seqlen_k,
|
187 |
+
dropout_p=0.0,
|
188 |
+
softmax_scale=None,
|
189 |
+
causal=True,
|
190 |
+
)
|
191 |
+
attn_output = pad_input(attn_output_unpad, indices_q, bsz, q_len)
|
192 |
+
else:
|
193 |
+
attn_output = flash_attn_func(
|
194 |
+
query_states, key_states, value_states, 0.0, softmax_scale=None, causal=True
|
195 |
+
)
|
196 |
+
|
197 |
+
if getattr(self.config, "group_size_ratio", None) and self.training: # shift back
|
198 |
+
attn_output.reshape(bsz, q_len, self.num_heads, self.head_dim)
|
199 |
+
attn_output = torch.cat((
|
200 |
+
attn_output[:, :, :self.num_heads//2], attn_output[:, :, self.num_heads//2:].roll(groupsz//2, dims=1)
|
201 |
+
))
|
202 |
+
|
203 |
+
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
|
204 |
+
attn_output = self.o_proj(attn_output)
|
205 |
+
|
206 |
+
if not output_attentions:
|
207 |
+
attn_weights = None
|
208 |
+
|
209 |
+
return attn_output, attn_weights, past_key_value
|
210 |
+
|
211 |
+
|
212 |
+
# Disable the transformation of the attention mask in LlamaModel as flash attention
|
213 |
+
# takes a boolean padding_mask. Fills in the past kv length for use in forward.
|
214 |
+
def _prepare_decoder_attention_mask(
|
215 |
+
self,
|
216 |
+
attention_mask: torch.Tensor,
|
217 |
+
input_shape: torch.Tensor,
|
218 |
+
inputs_embeds: torch.Tensor,
|
219 |
+
past_key_values_length: int
|
220 |
+
) -> torch.Tensor:
|
221 |
+
if attention_mask is not None and torch.all(attention_mask):
|
222 |
+
return None # This uses the faster call when training with full samples
|
223 |
+
|
224 |
+
return attention_mask
|
LLM-Detector-V7-11w/src/llmtuner/extras/ploting.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import math
|
3 |
+
import json
|
4 |
+
from typing import List, Optional
|
5 |
+
from transformers.trainer import TRAINER_STATE_NAME
|
6 |
+
|
7 |
+
from llmtuner.extras.logging import get_logger
|
8 |
+
from llmtuner.extras.packages import is_matplotlib_available
|
9 |
+
|
10 |
+
if is_matplotlib_available():
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
|
13 |
+
|
14 |
+
logger = get_logger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
def smooth(scalars: List[float]) -> List[float]:
|
18 |
+
r"""
|
19 |
+
EMA implementation according to TensorBoard.
|
20 |
+
"""
|
21 |
+
last = scalars[0]
|
22 |
+
smoothed = list()
|
23 |
+
weight = 1.8 * (1 / (1 + math.exp(-0.05 * len(scalars))) - 0.5) # a sigmoid function
|
24 |
+
for next_val in scalars:
|
25 |
+
smoothed_val = last * weight + (1 - weight) * next_val
|
26 |
+
smoothed.append(smoothed_val)
|
27 |
+
last = smoothed_val
|
28 |
+
return smoothed
|
29 |
+
|
30 |
+
|
31 |
+
def plot_loss(save_dictionary: os.PathLike, keys: Optional[List[str]] = ["loss"]) -> None:
|
32 |
+
|
33 |
+
with open(os.path.join(save_dictionary, TRAINER_STATE_NAME), "r", encoding="utf-8") as f:
|
34 |
+
data = json.load(f)
|
35 |
+
|
36 |
+
for key in keys:
|
37 |
+
steps, metrics = [], []
|
38 |
+
for i in range(len(data["log_history"])):
|
39 |
+
if key in data["log_history"][i]:
|
40 |
+
steps.append(data["log_history"][i]["step"])
|
41 |
+
metrics.append(data["log_history"][i][key])
|
42 |
+
|
43 |
+
if len(metrics) == 0:
|
44 |
+
logger.warning(f"No metric {key} to plot.")
|
45 |
+
continue
|
46 |
+
|
47 |
+
plt.figure()
|
48 |
+
plt.plot(steps, metrics, alpha=0.4, label="original")
|
49 |
+
plt.plot(steps, smooth(metrics), label="smoothed")
|
50 |
+
plt.title("training {} of {}".format(key, save_dictionary))
|
51 |
+
plt.xlabel("step")
|
52 |
+
plt.ylabel(key)
|
53 |
+
plt.legend()
|
54 |
+
plt.savefig(os.path.join(save_dictionary, "training_{}.png".format(key)), format="png", dpi=100)
|
55 |
+
print("Figure saved:", os.path.join(save_dictionary, "training_{}.png".format(key)))
|
LLM-Detector-V7-11w/src/llmtuner/hparams/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .data_args import DataArguments
|
2 |
+
from .evaluation_args import EvaluationArguments
|
3 |
+
from .finetuning_args import FinetuningArguments
|
4 |
+
from .generating_args import GeneratingArguments
|
5 |
+
from .model_args import ModelArguments
|
LLM-Detector-V7-11w/src/llmtuner/hparams/data_args.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from typing import List, Literal, Optional
|
4 |
+
from dataclasses import dataclass, field
|
5 |
+
|
6 |
+
|
7 |
+
DATA_CONFIG = "dataset_info.json"
|
8 |
+
|
9 |
+
|
10 |
+
def use_modelscope() -> bool:
|
11 |
+
return bool(int(os.environ.get("USE_MODELSCOPE_HUB", "0")))
|
12 |
+
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class DatasetAttr:
|
16 |
+
|
17 |
+
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
18 |
+
dataset_name: Optional[str] = None
|
19 |
+
dataset_sha1: Optional[str] = None
|
20 |
+
subset: Optional[str] = None
|
21 |
+
folder: Optional[str] = None
|
22 |
+
ranking: Optional[bool] = False
|
23 |
+
formatting: Optional[Literal["alpaca", "sharegpt"]] = "alpaca"
|
24 |
+
|
25 |
+
prompt: Optional[str] = "instruction"
|
26 |
+
query: Optional[str] = "input"
|
27 |
+
response: Optional[str] = "output"
|
28 |
+
history: Optional[str] = None
|
29 |
+
messages: Optional[str] = "conversations"
|
30 |
+
role: Optional[str] = "from"
|
31 |
+
content: Optional[str] = "value"
|
32 |
+
system: Optional[str] = None
|
33 |
+
|
34 |
+
def __repr__(self) -> str:
|
35 |
+
return self.dataset_name
|
36 |
+
|
37 |
+
|
38 |
+
@dataclass
|
39 |
+
class DataArguments:
|
40 |
+
r"""
|
41 |
+
Arguments pertaining to what data we are going to input our model for training and evaluation.
|
42 |
+
"""
|
43 |
+
template: Optional[str] = field(
|
44 |
+
default=None,
|
45 |
+
metadata={"help": "Which template to use for constructing prompts in training and inference."}
|
46 |
+
)
|
47 |
+
dataset: Optional[str] = field(
|
48 |
+
default=None,
|
49 |
+
metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."}
|
50 |
+
)
|
51 |
+
dataset_dir: Optional[str] = field(
|
52 |
+
default="data",
|
53 |
+
metadata={"help": "Path to the folder containing the datasets."}
|
54 |
+
)
|
55 |
+
split: Optional[str] = field(
|
56 |
+
default="train",
|
57 |
+
metadata={"help": "Which dataset split to use for training and evaluation."}
|
58 |
+
)
|
59 |
+
cutoff_len: Optional[int] = field(
|
60 |
+
default=1024,
|
61 |
+
metadata={"help": "The maximum length of the model inputs after tokenization."}
|
62 |
+
)
|
63 |
+
reserved_label_len: Optional[int] = field(
|
64 |
+
default=1,
|
65 |
+
metadata={"help": "The maximum length reserved for label after tokenization."}
|
66 |
+
)
|
67 |
+
train_on_prompt: Optional[bool] = field(
|
68 |
+
default=False,
|
69 |
+
metadata={"help": "Whether to disable the mask on the prompt or not."}
|
70 |
+
)
|
71 |
+
streaming: Optional[bool] = field(
|
72 |
+
default=False,
|
73 |
+
metadata={"help": "Enable dataset streaming."}
|
74 |
+
)
|
75 |
+
buffer_size: Optional[int] = field(
|
76 |
+
default=16384,
|
77 |
+
metadata={"help": "Size of the buffer to randomly sample examples from in dataset streaming."}
|
78 |
+
)
|
79 |
+
mix_strategy: Optional[Literal["concat", "interleave_under", "interleave_over"]] = field(
|
80 |
+
default="concat",
|
81 |
+
metadata={"help": "Strategy to use in dataset mixing (concat/interleave) (undersampling/oversampling)."}
|
82 |
+
)
|
83 |
+
interleave_probs: Optional[str] = field(
|
84 |
+
default=None,
|
85 |
+
metadata={"help": "Probabilities to sample data from datasets. Use commas to separate multiple datasets."}
|
86 |
+
)
|
87 |
+
overwrite_cache: Optional[bool] = field(
|
88 |
+
default=False,
|
89 |
+
metadata={"help": "Overwrite the cached training and evaluation sets."}
|
90 |
+
)
|
91 |
+
preprocessing_num_workers: Optional[int] = field(
|
92 |
+
default=None,
|
93 |
+
metadata={"help": "The number of processes to use for the preprocessing."}
|
94 |
+
)
|
95 |
+
max_samples: Optional[int] = field(
|
96 |
+
default=None,
|
97 |
+
metadata={"help": "For debugging purposes, truncate the number of examples for each dataset."}
|
98 |
+
)
|
99 |
+
eval_num_beams: Optional[int] = field(
|
100 |
+
default=None,
|
101 |
+
metadata={"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`"}
|
102 |
+
)
|
103 |
+
ignore_pad_token_for_loss: Optional[bool] = field(
|
104 |
+
default=True,
|
105 |
+
metadata={"help": "Whether to ignore the tokens corresponding to padded labels in the loss computation or not."}
|
106 |
+
)
|
107 |
+
val_size: Optional[float] = field(
|
108 |
+
default=0,
|
109 |
+
metadata={"help": "Size of the development set, should be an integer or a float in range `[0,1)`."}
|
110 |
+
)
|
111 |
+
sft_packing: Optional[bool] = field(
|
112 |
+
default=False,
|
113 |
+
metadata={"help": "Packing the questions and answers in the supervised fine-tuning stage."}
|
114 |
+
)
|
115 |
+
cache_path: Optional[str] = field(
|
116 |
+
default=None,
|
117 |
+
metadata={"help": "Path to save or load the preprocessed datasets."}
|
118 |
+
)
|
119 |
+
|
120 |
+
def __post_init__(self):
|
121 |
+
if self.reserved_label_len >= self.cutoff_len:
|
122 |
+
raise ValueError("`reserved_label_len` must be smaller than `cutoff_len`.")
|
123 |
+
|
124 |
+
if self.streaming and self.val_size > 1e-6 and self.val_size < 1:
|
125 |
+
raise ValueError("Streaming mode should have an integer val size.")
|
126 |
+
|
127 |
+
if self.streaming and self.max_samples is not None:
|
128 |
+
raise ValueError("`max_samples` is incompatible with `streaming`.")
|
129 |
+
|
130 |
+
def init_for_training(self, seed: int): # support mixing multiple datasets
|
131 |
+
self.seed = seed
|
132 |
+
dataset_names = [ds.strip() for ds in self.dataset.split(",")] if self.dataset is not None else []
|
133 |
+
try:
|
134 |
+
with open(os.path.join(self.dataset_dir, DATA_CONFIG), "r") as f:
|
135 |
+
dataset_info = json.load(f)
|
136 |
+
except Exception as err:
|
137 |
+
if self.dataset is not None:
|
138 |
+
raise ValueError("Cannot open {} due to {}.".format(os.path.join(self.dataset_dir, DATA_CONFIG), str(err)))
|
139 |
+
dataset_info = None
|
140 |
+
|
141 |
+
if self.interleave_probs is not None:
|
142 |
+
self.interleave_probs = [float(prob.strip()) for prob in self.interleave_probs.split(",")]
|
143 |
+
|
144 |
+
self.dataset_list: List[DatasetAttr] = []
|
145 |
+
for name in dataset_names:
|
146 |
+
if name not in dataset_info:
|
147 |
+
raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG))
|
148 |
+
|
149 |
+
has_hf_url = "hf_hub_url" in dataset_info[name]
|
150 |
+
has_ms_url = "ms_hub_url" in dataset_info[name]
|
151 |
+
|
152 |
+
if has_hf_url or has_ms_url:
|
153 |
+
if (use_modelscope() and has_ms_url) or (not has_hf_url):
|
154 |
+
dataset_attr = DatasetAttr(
|
155 |
+
"ms_hub",
|
156 |
+
dataset_name=dataset_info[name]["ms_hub_url"]
|
157 |
+
)
|
158 |
+
else:
|
159 |
+
dataset_attr = DatasetAttr(
|
160 |
+
"hf_hub",
|
161 |
+
dataset_name=dataset_info[name]["hf_hub_url"]
|
162 |
+
)
|
163 |
+
elif "script_url" in dataset_info[name]:
|
164 |
+
dataset_attr = DatasetAttr(
|
165 |
+
"script",
|
166 |
+
dataset_name=dataset_info[name]["script_url"]
|
167 |
+
)
|
168 |
+
else:
|
169 |
+
dataset_attr = DatasetAttr(
|
170 |
+
"file",
|
171 |
+
dataset_name=dataset_info[name]["file_name"],
|
172 |
+
dataset_sha1=dataset_info[name].get("file_sha1", None)
|
173 |
+
)
|
174 |
+
|
175 |
+
if "columns" in dataset_info[name]:
|
176 |
+
dataset_attr.prompt = dataset_info[name]["columns"].get("prompt", None)
|
177 |
+
dataset_attr.query = dataset_info[name]["columns"].get("query", None)
|
178 |
+
dataset_attr.response = dataset_info[name]["columns"].get("response", None)
|
179 |
+
dataset_attr.history = dataset_info[name]["columns"].get("history", None)
|
180 |
+
dataset_attr.messages = dataset_info[name]["columns"].get("messages", None)
|
181 |
+
dataset_attr.role = dataset_info[name]["columns"].get("role", None)
|
182 |
+
dataset_attr.content = dataset_info[name]["columns"].get("content", None)
|
183 |
+
dataset_attr.system = dataset_info[name]["columns"].get("system", None)
|
184 |
+
|
185 |
+
dataset_attr.subset = dataset_info[name].get("subset", None)
|
186 |
+
dataset_attr.folder = dataset_info[name].get("folder", None)
|
187 |
+
dataset_attr.ranking = dataset_info[name].get("ranking", False)
|
188 |
+
dataset_attr.formatting = dataset_info[name].get("formatting", "alpaca")
|
189 |
+
self.dataset_list.append(dataset_attr)
|
LLM-Detector-V7-11w/src/llmtuner/hparams/evaluation_args.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Literal, Optional
|
3 |
+
from dataclasses import dataclass, field
|
4 |
+
|
5 |
+
from datasets import DownloadMode
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class EvaluationArguments:
|
10 |
+
r"""
|
11 |
+
Arguments pertaining to specify the evaluation parameters.
|
12 |
+
"""
|
13 |
+
task: str = field(
|
14 |
+
metadata={"help": "Name of the evaluation task."}
|
15 |
+
)
|
16 |
+
task_dir: Optional[str] = field(
|
17 |
+
default="evaluation",
|
18 |
+
metadata={"help": "Path to the folder containing the evaluation datasets."}
|
19 |
+
)
|
20 |
+
batch_size: Optional[int] = field(
|
21 |
+
default=4,
|
22 |
+
metadata={"help": "The batch size per GPU for evaluation."}
|
23 |
+
)
|
24 |
+
seed: Optional[int] = field(
|
25 |
+
default=42,
|
26 |
+
metadata={"help": "Random seed to be used with data loaders."}
|
27 |
+
)
|
28 |
+
lang: Optional[Literal["en", "zh"]] = field(
|
29 |
+
default="en",
|
30 |
+
metadata={"help": "Language used at evaluation."}
|
31 |
+
)
|
32 |
+
n_shot: Optional[int] = field(
|
33 |
+
default=5,
|
34 |
+
metadata={"help": "Number of examplars for few-shot learning."}
|
35 |
+
)
|
36 |
+
save_dir: Optional[str] = field(
|
37 |
+
default=None,
|
38 |
+
metadata={"help": "Path to save the evaluation results."}
|
39 |
+
)
|
40 |
+
download_mode: Optional[DownloadMode] = field(
|
41 |
+
default=DownloadMode.REUSE_DATASET_IF_EXISTS,
|
42 |
+
metadata={"help": "Download mode used for the evaluation datasets."}
|
43 |
+
)
|
44 |
+
|
45 |
+
def __post_init__(self):
|
46 |
+
task_available = []
|
47 |
+
for folder in os.listdir(self.task_dir):
|
48 |
+
if os.path.isdir(os.path.join(self.task_dir, folder)):
|
49 |
+
task_available.append(folder)
|
50 |
+
|
51 |
+
if self.task not in task_available:
|
52 |
+
raise ValueError("Task {} not found in {}.".format(self.task, self.task_dir))
|
53 |
+
|
54 |
+
if self.save_dir is not None and os.path.exists(self.save_dir):
|
55 |
+
raise ValueError("`save_dir` already exists, use another one.")
|
LLM-Detector-V7-11w/src/llmtuner/hparams/finetuning_args.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from typing import Literal, Optional
|
3 |
+
from dataclasses import asdict, dataclass, field
|
4 |
+
|
5 |
+
|
6 |
+
@dataclass
|
7 |
+
class FreezeArguments:
|
8 |
+
r"""
|
9 |
+
Arguments pertaining to the freeze (partial-parameter) training.
|
10 |
+
"""
|
11 |
+
name_module_trainable: Optional[str] = field(
|
12 |
+
default="mlp",
|
13 |
+
metadata={"help": "Name of trainable modules for partial-parameter (freeze) fine-tuning. \
|
14 |
+
Use commas to separate multiple modules. \
|
15 |
+
LLaMA choices: [\"mlp\", \"self_attn\"], \
|
16 |
+
BLOOM & Falcon & ChatGLM choices: [\"mlp\", \"self_attention\"], \
|
17 |
+
Qwen choices: [\"mlp\", \"attn\"], \
|
18 |
+
Phi choices: [\"mlp\", \"mixer\"], \
|
19 |
+
Others choices: the same as LLaMA."}
|
20 |
+
)
|
21 |
+
num_layer_trainable: Optional[int] = field(
|
22 |
+
default=3,
|
23 |
+
metadata={"help": "The number of trainable layers for partial-parameter (freeze) fine-tuning."}
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
@dataclass
|
28 |
+
class LoraArguments:
|
29 |
+
r"""
|
30 |
+
Arguments pertaining to the LoRA training.
|
31 |
+
"""
|
32 |
+
additional_target: Optional[str] = field(
|
33 |
+
default=None,
|
34 |
+
metadata={"help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint."}
|
35 |
+
)
|
36 |
+
lora_alpha: Optional[int] = field(
|
37 |
+
default=None,
|
38 |
+
metadata={"help": "The scale factor for LoRA fine-tuning (default: lora_rank * 2)."}
|
39 |
+
)
|
40 |
+
lora_dropout: Optional[float] = field(
|
41 |
+
default=0.0,
|
42 |
+
metadata={"help": "Dropout rate for the LoRA fine-tuning."}
|
43 |
+
)
|
44 |
+
lora_rank: Optional[int] = field(
|
45 |
+
default=8,
|
46 |
+
metadata={"help": "The intrinsic dimension for LoRA fine-tuning."}
|
47 |
+
)
|
48 |
+
lora_target: Optional[str] = field(
|
49 |
+
default=None,
|
50 |
+
metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
|
51 |
+
LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
|
52 |
+
BLOOM & Falcon & ChatGLM choices: [\"query_key_value\", \"dense\", \"dense_h_to_4h\", \"dense_4h_to_h\"], \
|
53 |
+
Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
|
54 |
+
Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \
|
55 |
+
Phi choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
|
56 |
+
Others choices: the same as LLaMA."}
|
57 |
+
)
|
58 |
+
create_new_adapter: Optional[bool] = field(
|
59 |
+
default=False,
|
60 |
+
metadata={"help": "Whether to create a new adapter with randomly initialized weight or not."}
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
@dataclass
|
65 |
+
class RLHFArguments:
|
66 |
+
r"""
|
67 |
+
Arguments pertaining to the PPO and DPO training.
|
68 |
+
"""
|
69 |
+
dpo_beta: Optional[float] = field(
|
70 |
+
default=0.1,
|
71 |
+
metadata={"help": "The beta parameter for the DPO loss."}
|
72 |
+
)
|
73 |
+
dpo_loss: Optional[Literal["sigmoid", "hinge", "ipo", "kto"]] = field(
|
74 |
+
default="sigmoid",
|
75 |
+
metadata={"help": "The type of DPO loss to use."}
|
76 |
+
)
|
77 |
+
dpo_ftx: Optional[float] = field(
|
78 |
+
default=0,
|
79 |
+
metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."}
|
80 |
+
)
|
81 |
+
ppo_buffer_size: Optional[int] = field(
|
82 |
+
default=1,
|
83 |
+
metadata={"help": "The number of mini-batches to make experience buffer in a PPO optimization step."}
|
84 |
+
)
|
85 |
+
ppo_epochs: Optional[int] = field(
|
86 |
+
default=4,
|
87 |
+
metadata={"help": "The number of epochs to perform in a PPO optimization step."}
|
88 |
+
)
|
89 |
+
ppo_logger: Optional[str] = field(
|
90 |
+
default=None,
|
91 |
+
metadata={"help": "Log with either \"wandb\" or \"tensorboard\" in PPO training."}
|
92 |
+
)
|
93 |
+
ppo_score_norm: Optional[bool] = field(
|
94 |
+
default=False,
|
95 |
+
metadata={"help": "Use score normalization in PPO training."}
|
96 |
+
)
|
97 |
+
ppo_target: Optional[float] = field(
|
98 |
+
default=6.0,
|
99 |
+
metadata={"help": "Target KL value for adaptive KL control in PPO training."}
|
100 |
+
)
|
101 |
+
ppo_whiten_rewards: Optional[bool] = field(
|
102 |
+
default=False,
|
103 |
+
metadata={"help": "Whiten the rewards before compute advantages in PPO training."}
|
104 |
+
)
|
105 |
+
ref_model: Optional[str] = field(
|
106 |
+
default=None,
|
107 |
+
metadata={"help": "Path to the reference model used for the PPO or DPO training."}
|
108 |
+
)
|
109 |
+
ref_model_adapters: Optional[str] = field(
|
110 |
+
default=None,
|
111 |
+
metadata={"help": "Path to the adapters of the reference model."}
|
112 |
+
)
|
113 |
+
ref_model_quantization_bit: Optional[int] = field(
|
114 |
+
default=None,
|
115 |
+
metadata={"help": "The number of bits to quantize the reference model."}
|
116 |
+
)
|
117 |
+
reward_model: Optional[str] = field(
|
118 |
+
default=None,
|
119 |
+
metadata={"help": "Path to the reward model used for the PPO training."}
|
120 |
+
)
|
121 |
+
reward_model_adapters: Optional[str] = field(
|
122 |
+
default=None,
|
123 |
+
metadata={"help": "Path to the adapters of the reward model."}
|
124 |
+
)
|
125 |
+
reward_model_quantization_bit: Optional[int] = field(
|
126 |
+
default=None,
|
127 |
+
metadata={"help": "The number of bits to quantize the reward model."}
|
128 |
+
)
|
129 |
+
reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
|
130 |
+
default="lora",
|
131 |
+
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}
|
132 |
+
)
|
133 |
+
|
134 |
+
|
135 |
+
@dataclass
|
136 |
+
class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments):
|
137 |
+
r"""
|
138 |
+
Arguments pertaining to which techniques we are going to fine-tuning with.
|
139 |
+
"""
|
140 |
+
stage: Optional[Literal["pt", "sft", "rm", "ppo", "dpo"]] = field(
|
141 |
+
default="sft",
|
142 |
+
metadata={"help": "Which stage will be performed in training."}
|
143 |
+
)
|
144 |
+
finetuning_type: Optional[Literal["lora", "freeze", "full"]] = field(
|
145 |
+
default="lora",
|
146 |
+
metadata={"help": "Which fine-tuning method to use."}
|
147 |
+
)
|
148 |
+
plot_loss: Optional[bool] = field(
|
149 |
+
default=False,
|
150 |
+
metadata={"help": "Whether or not to save the training loss curves."}
|
151 |
+
)
|
152 |
+
|
153 |
+
def __post_init__(self):
|
154 |
+
def split_arg(arg):
|
155 |
+
if isinstance(arg, str):
|
156 |
+
return [item.strip() for item in arg.split(",")]
|
157 |
+
return arg
|
158 |
+
|
159 |
+
self.name_module_trainable = split_arg(self.name_module_trainable)
|
160 |
+
self.lora_alpha = self.lora_alpha or self.lora_rank * 2
|
161 |
+
self.lora_target = split_arg(self.lora_target)
|
162 |
+
self.additional_target = split_arg(self.additional_target)
|
163 |
+
self.ref_model_adapters = split_arg(self.ref_model_adapters)
|
164 |
+
self.reward_model_adapters = split_arg(self.reward_model_adapters)
|
165 |
+
|
166 |
+
assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method."
|
167 |
+
assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
|
168 |
+
assert self.reward_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
|
169 |
+
|
170 |
+
if self.stage == "ppo" and self.reward_model is None:
|
171 |
+
raise ValueError("Reward model is necessary for PPO training.")
|
172 |
+
|
173 |
+
if self.stage == "ppo" and self.reward_model_type == "lora" and self.finetuning_type != "lora":
|
174 |
+
raise ValueError("Freeze/Full PPO training needs `reward_model_type=full`.")
|
175 |
+
|
176 |
+
def save_to_json(self, json_path: str):
|
177 |
+
r"""Saves the content of this instance in JSON format inside `json_path`."""
|
178 |
+
json_string = json.dumps(asdict(self), indent=2, sort_keys=True) + "\n"
|
179 |
+
with open(json_path, "w", encoding="utf-8") as f:
|
180 |
+
f.write(json_string)
|
181 |
+
|
182 |
+
@classmethod
|
183 |
+
def load_from_json(cls, json_path: str):
|
184 |
+
r"""Creates an instance from the content of `json_path`."""
|
185 |
+
with open(json_path, "r", encoding="utf-8") as f:
|
186 |
+
text = f.read()
|
187 |
+
|
188 |
+
return cls(**json.loads(text))
|
LLM-Detector-V7-11w/src/llmtuner/hparams/generating_args.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, Optional
|
2 |
+
from dataclasses import asdict, dataclass, field
|
3 |
+
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class GeneratingArguments:
|
7 |
+
r"""
|
8 |
+
Arguments pertaining to specify the decoding parameters.
|
9 |
+
"""
|
10 |
+
do_sample: Optional[bool] = field(
|
11 |
+
default=True,
|
12 |
+
metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."}
|
13 |
+
)
|
14 |
+
temperature: Optional[float] = field(
|
15 |
+
default=0.95,
|
16 |
+
metadata={"help": "The value used to modulate the next token probabilities."}
|
17 |
+
)
|
18 |
+
top_p: Optional[float] = field(
|
19 |
+
default=0.7,
|
20 |
+
metadata={"help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."}
|
21 |
+
)
|
22 |
+
top_k: Optional[int] = field(
|
23 |
+
default=50,
|
24 |
+
metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."}
|
25 |
+
)
|
26 |
+
num_beams: Optional[int] = field(
|
27 |
+
default=1,
|
28 |
+
metadata={"help": "Number of beams for beam search. 1 means no beam search."}
|
29 |
+
)
|
30 |
+
max_length: Optional[int] = field(
|
31 |
+
default=512,
|
32 |
+
metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."}
|
33 |
+
)
|
34 |
+
max_new_tokens: Optional[int] = field(
|
35 |
+
default=512,
|
36 |
+
metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."}
|
37 |
+
)
|
38 |
+
repetition_penalty: Optional[float] = field(
|
39 |
+
default=1.0,
|
40 |
+
metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."}
|
41 |
+
)
|
42 |
+
length_penalty: Optional[float] = field(
|
43 |
+
default=1.0,
|
44 |
+
metadata={"help": "Exponential penalty to the length that is used with beam-based generation."}
|
45 |
+
)
|
46 |
+
|
47 |
+
def to_dict(self) -> Dict[str, Any]:
|
48 |
+
args = asdict(self)
|
49 |
+
if args.get("max_new_tokens", -1) > 0:
|
50 |
+
args.pop("max_length", None)
|
51 |
+
else:
|
52 |
+
args.pop("max_new_tokens", None)
|
53 |
+
return args
|
LLM-Detector-V7-11w/src/llmtuner/hparams/model_args.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, Literal, Optional
|
2 |
+
from dataclasses import asdict, dataclass, field
|
3 |
+
|
4 |
+
|
5 |
+
@dataclass
|
6 |
+
class ModelArguments:
|
7 |
+
r"""
|
8 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune.
|
9 |
+
"""
|
10 |
+
model_name_or_path: str = field(
|
11 |
+
metadata={"help": "Path to the model weight or identifier from huggingface.co/models or modelscope.cn/models."}
|
12 |
+
)
|
13 |
+
adapter_name_or_path: Optional[str] = field(
|
14 |
+
default=None,
|
15 |
+
metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."}
|
16 |
+
)
|
17 |
+
cache_dir: Optional[str] = field(
|
18 |
+
default=None,
|
19 |
+
metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."}
|
20 |
+
)
|
21 |
+
use_fast_tokenizer: Optional[bool] = field(
|
22 |
+
default=False,
|
23 |
+
metadata={"help": "Whether or not to use one of the fast tokenizer (backed by the tokenizers library)."}
|
24 |
+
)
|
25 |
+
resize_vocab: Optional[bool] = field(
|
26 |
+
default=False,
|
27 |
+
metadata={"help": "Whether or not to resize the tokenizer vocab and the embedding layers."}
|
28 |
+
)
|
29 |
+
split_special_tokens: Optional[bool] = field(
|
30 |
+
default=False,
|
31 |
+
metadata={"help": "Whether or not the special tokens should be split during the tokenization process."}
|
32 |
+
)
|
33 |
+
model_revision: Optional[str] = field(
|
34 |
+
default="main",
|
35 |
+
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}
|
36 |
+
)
|
37 |
+
quantization_bit: Optional[int] = field(
|
38 |
+
default=None,
|
39 |
+
metadata={"help": "The number of bits to quantize the model."}
|
40 |
+
)
|
41 |
+
quantization_type: Optional[Literal["fp4", "nf4"]] = field(
|
42 |
+
default="nf4",
|
43 |
+
metadata={"help": "Quantization data type to use in int4 training."}
|
44 |
+
)
|
45 |
+
double_quantization: Optional[bool] = field(
|
46 |
+
default=True,
|
47 |
+
metadata={"help": "Whether or not to use double quantization in int4 training."}
|
48 |
+
)
|
49 |
+
rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
|
50 |
+
default=None,
|
51 |
+
metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."}
|
52 |
+
)
|
53 |
+
flash_attn: Optional[bool] = field(
|
54 |
+
default=False,
|
55 |
+
metadata={"help": "Enable FlashAttention-2 for faster training."}
|
56 |
+
)
|
57 |
+
shift_attn: Optional[bool] = field(
|
58 |
+
default=False,
|
59 |
+
metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."}
|
60 |
+
)
|
61 |
+
use_unsloth: Optional[bool] = field(
|
62 |
+
default=False,
|
63 |
+
metadata={"help": "Whether or not to use unsloth's optimization for the LoRA training."}
|
64 |
+
)
|
65 |
+
disable_gradient_checkpointing: Optional[bool] = field(
|
66 |
+
default=False,
|
67 |
+
metadata={"help": "Whether or not to disable gradient checkpointing."}
|
68 |
+
)
|
69 |
+
upcast_layernorm: Optional[bool] = field(
|
70 |
+
default=False,
|
71 |
+
metadata={"help": "Whether or not to upcast the layernorm weights in fp32."}
|
72 |
+
)
|
73 |
+
hf_hub_token: Optional[str] = field(
|
74 |
+
default=None,
|
75 |
+
metadata={"help": "Auth token to log in with Hugging Face Hub."}
|
76 |
+
)
|
77 |
+
ms_hub_token: Optional[str] = field(
|
78 |
+
default=None,
|
79 |
+
metadata={"help": "Auth token to log in with ModelScope Hub."}
|
80 |
+
)
|
81 |
+
export_dir: Optional[str] = field(
|
82 |
+
default=None,
|
83 |
+
metadata={"help": "Path to the directory to save the exported model."}
|
84 |
+
)
|
85 |
+
export_size: Optional[int] = field(
|
86 |
+
default=1,
|
87 |
+
metadata={"help": "The file shard size (in GB) of the exported model."}
|
88 |
+
)
|
89 |
+
export_quantization_bit: Optional[int] = field(
|
90 |
+
default=None,
|
91 |
+
metadata={"help": "The number of bits to quantize the exported model."}
|
92 |
+
)
|
93 |
+
export_quantization_dataset: Optional[str] = field(
|
94 |
+
default=None,
|
95 |
+
metadata={"help": "Path to the dataset or dataset name to use in quantizing the exported model."}
|
96 |
+
)
|
97 |
+
export_quantization_nsamples: Optional[int] = field(
|
98 |
+
default=128,
|
99 |
+
metadata={"help": "The number of samples used for quantization."}
|
100 |
+
)
|
101 |
+
export_quantization_maxlen: Optional[int] = field(
|
102 |
+
default=1024,
|
103 |
+
metadata={"help": "The maximum length of the model inputs used for quantization."}
|
104 |
+
)
|
105 |
+
export_legacy_format: Optional[bool] = field(
|
106 |
+
default=False,
|
107 |
+
metadata={"help": "Whether or not to save the `.bin` files instead of `.safetensors`."}
|
108 |
+
)
|
109 |
+
|
110 |
+
def __post_init__(self):
|
111 |
+
self.compute_dtype = None
|
112 |
+
self.model_max_length = None
|
113 |
+
|
114 |
+
if self.split_special_tokens and self.use_fast_tokenizer:
|
115 |
+
raise ValueError("`split_special_tokens` is only supported for slow tokenizers.")
|
116 |
+
|
117 |
+
if self.adapter_name_or_path is not None: # support merging multiple lora weights
|
118 |
+
self.adapter_name_or_path = [path.strip() for path in self.adapter_name_or_path.split(",")]
|
119 |
+
|
120 |
+
assert self.quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization."
|
121 |
+
assert self.export_quantization_bit in [None, 8, 4, 3, 2], "We only accept 2/3/4/8-bit quantization."
|
122 |
+
|
123 |
+
if self.export_quantization_bit is not None and self.export_quantization_dataset is None:
|
124 |
+
raise ValueError("Quantization dataset is necessary for exporting.")
|
125 |
+
|
126 |
+
def to_dict(self) -> Dict[str, Any]:
|
127 |
+
return asdict(self)
|
LLM-Detector-V7-11w/src/llmtuner/model/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Level: loader > adapter > parser, utils
|
2 |
+
|
3 |
+
from llmtuner.model.loader import load_model_and_tokenizer
|
4 |
+
from llmtuner.model.parser import get_train_args, get_infer_args, get_eval_args
|
5 |
+
from llmtuner.model.utils import dispatch_model, get_modelcard_args, load_valuehead_params
|
LLM-Detector-V7-11w/src/llmtuner/model/adapter.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from typing import TYPE_CHECKING
|
3 |
+
from transformers.integrations import is_deepspeed_zero3_enabled
|
4 |
+
from peft import PeftModel, TaskType, LoraConfig, get_peft_model
|
5 |
+
|
6 |
+
from llmtuner.extras.logging import get_logger
|
7 |
+
from llmtuner.model.utils import find_all_linear_modules
|
8 |
+
|
9 |
+
if TYPE_CHECKING:
|
10 |
+
from transformers.modeling_utils import PreTrainedModel
|
11 |
+
from llmtuner.hparams import ModelArguments, FinetuningArguments
|
12 |
+
|
13 |
+
|
14 |
+
logger = get_logger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
def init_adapter(
|
18 |
+
model: "PreTrainedModel",
|
19 |
+
model_args: "ModelArguments",
|
20 |
+
finetuning_args: "FinetuningArguments",
|
21 |
+
is_trainable: bool
|
22 |
+
) -> "PreTrainedModel":
|
23 |
+
r"""
|
24 |
+
Initializes the adapters.
|
25 |
+
|
26 |
+
Support full-parameter, freeze and LoRA training.
|
27 |
+
|
28 |
+
Note that the trainable parameters must be cast to float32.
|
29 |
+
"""
|
30 |
+
|
31 |
+
if (not is_trainable) and model_args.adapter_name_or_path is None:
|
32 |
+
logger.info("Adapter is not found at evaluation, load the base model.")
|
33 |
+
return model
|
34 |
+
|
35 |
+
if finetuning_args.finetuning_type == "full" and is_trainable:
|
36 |
+
logger.info("Fine-tuning method: Full")
|
37 |
+
model = model.float()
|
38 |
+
|
39 |
+
if finetuning_args.finetuning_type == "freeze" and is_trainable:
|
40 |
+
logger.info("Fine-tuning method: Freeze")
|
41 |
+
num_layers = (
|
42 |
+
getattr(model.config, "num_hidden_layers", None)
|
43 |
+
or getattr(model.config, "num_layers", None)
|
44 |
+
or getattr(model.config, "n_layer", None)
|
45 |
+
)
|
46 |
+
if not num_layers:
|
47 |
+
raise ValueError("Current model does not support freeze tuning.")
|
48 |
+
|
49 |
+
if finetuning_args.num_layer_trainable > 0: # fine-tuning the last n layers if num_layer_trainable > 0
|
50 |
+
trainable_layer_ids = [num_layers - k - 1 for k in range(finetuning_args.num_layer_trainable)]
|
51 |
+
else: # fine-tuning the first n layers if num_layer_trainable < 0
|
52 |
+
trainable_layer_ids = [k for k in range(-finetuning_args.num_layer_trainable)]
|
53 |
+
|
54 |
+
trainable_layers = []
|
55 |
+
for module_name in finetuning_args.name_module_trainable:
|
56 |
+
for idx in trainable_layer_ids:
|
57 |
+
trainable_layers.append("{:d}.{}".format(idx, module_name))
|
58 |
+
|
59 |
+
for name, param in model.named_parameters():
|
60 |
+
if not any(trainable_layer in name for trainable_layer in trainable_layers):
|
61 |
+
param.requires_grad_(False)
|
62 |
+
else:
|
63 |
+
param.data = param.data.to(torch.float32)
|
64 |
+
|
65 |
+
if finetuning_args.finetuning_type == "lora":
|
66 |
+
logger.info("Fine-tuning method: LoRA")
|
67 |
+
adapter_to_resume = None
|
68 |
+
|
69 |
+
if model_args.adapter_name_or_path is not None:
|
70 |
+
is_mergeable = True
|
71 |
+
if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable
|
72 |
+
assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter."
|
73 |
+
is_mergeable = False
|
74 |
+
|
75 |
+
if is_deepspeed_zero3_enabled():
|
76 |
+
assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3."
|
77 |
+
is_mergeable = False
|
78 |
+
|
79 |
+
if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable):
|
80 |
+
adapter_to_merge = model_args.adapter_name_or_path[:-1]
|
81 |
+
adapter_to_resume = model_args.adapter_name_or_path[-1]
|
82 |
+
else:
|
83 |
+
adapter_to_merge = model_args.adapter_name_or_path
|
84 |
+
|
85 |
+
for adapter in adapter_to_merge:
|
86 |
+
model = PeftModel.from_pretrained(model, adapter)
|
87 |
+
model = model.merge_and_unload()
|
88 |
+
|
89 |
+
if len(adapter_to_merge) > 0:
|
90 |
+
logger.info("Merged {} adapter(s).".format(len(adapter_to_merge)))
|
91 |
+
|
92 |
+
if adapter_to_resume is not None: # resume lora training
|
93 |
+
model = PeftModel.from_pretrained(model, adapter_to_resume, is_trainable=is_trainable)
|
94 |
+
|
95 |
+
if is_trainable and adapter_to_resume is None: # create new lora weights while training
|
96 |
+
if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all":
|
97 |
+
target_modules = find_all_linear_modules(model)
|
98 |
+
else:
|
99 |
+
target_modules = finetuning_args.lora_target
|
100 |
+
|
101 |
+
peft_kwargs = {
|
102 |
+
"r": finetuning_args.lora_rank,
|
103 |
+
"target_modules": target_modules,
|
104 |
+
"lora_alpha": finetuning_args.lora_alpha,
|
105 |
+
"lora_dropout": finetuning_args.lora_dropout
|
106 |
+
}
|
107 |
+
|
108 |
+
if model_args.use_unsloth:
|
109 |
+
from unsloth import FastLlamaModel, FastMistralModel # type: ignore
|
110 |
+
unsloth_peft_kwargs = {"model": model, "max_seq_length": model_args.model_max_length}
|
111 |
+
if getattr(model.config, "model_type", None) == "llama":
|
112 |
+
model = FastLlamaModel.get_peft_model(**peft_kwargs, **unsloth_peft_kwargs)
|
113 |
+
elif getattr(model.config, "model_type", None) == "mistral":
|
114 |
+
model = FastMistralModel.get_peft_model(**peft_kwargs, **unsloth_peft_kwargs)
|
115 |
+
else:
|
116 |
+
raise NotImplementedError
|
117 |
+
|
118 |
+
else:
|
119 |
+
lora_config = LoraConfig(
|
120 |
+
task_type=TaskType.CAUSAL_LM,
|
121 |
+
inference_mode=False,
|
122 |
+
modules_to_save=finetuning_args.additional_target,
|
123 |
+
**peft_kwargs
|
124 |
+
)
|
125 |
+
model = get_peft_model(model, lora_config)
|
126 |
+
|
127 |
+
for param in filter(lambda p: p.requires_grad, model.parameters()):
|
128 |
+
param.data = param.data.to(torch.float32)
|
129 |
+
|
130 |
+
if model_args.adapter_name_or_path is not None:
|
131 |
+
logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path)))
|
132 |
+
|
133 |
+
return model
|
LLM-Detector-V7-11w/src/llmtuner/model/loader.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import TYPE_CHECKING, Optional, Tuple
|
2 |
+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
3 |
+
from transformers.integrations import is_deepspeed_zero3_enabled
|
4 |
+
from transformers.utils.versions import require_version
|
5 |
+
from trl import AutoModelForCausalLMWithValueHead
|
6 |
+
|
7 |
+
from llmtuner.extras.logging import get_logger
|
8 |
+
from llmtuner.extras.misc import count_parameters, get_current_device, try_download_model_from_ms
|
9 |
+
from llmtuner.model.adapter import init_adapter
|
10 |
+
from llmtuner.model.patcher import patch_config, patch_tokenizer, patch_model, patch_valuehead_model
|
11 |
+
from llmtuner.model.utils import load_valuehead_params, register_autoclass
|
12 |
+
|
13 |
+
if TYPE_CHECKING:
|
14 |
+
from transformers import PreTrainedModel, PreTrainedTokenizer
|
15 |
+
from llmtuner.hparams import ModelArguments, FinetuningArguments
|
16 |
+
|
17 |
+
|
18 |
+
logger = get_logger(__name__)
|
19 |
+
|
20 |
+
|
21 |
+
require_version("transformers>=4.36.2", "To fix: pip install transformers>=4.36.2")
|
22 |
+
require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3")
|
23 |
+
require_version("accelerate>=0.21.0", "To fix: pip install accelerate>=0.21.0")
|
24 |
+
require_version("peft>=0.7.0", "To fix: pip install peft>=0.7.0")
|
25 |
+
require_version("trl>=0.7.6", "To fix: pip install trl>=0.7.6")
|
26 |
+
|
27 |
+
|
28 |
+
def load_model_and_tokenizer(
|
29 |
+
model_args: "ModelArguments",
|
30 |
+
finetuning_args: "FinetuningArguments",
|
31 |
+
is_trainable: Optional[bool] = False,
|
32 |
+
add_valuehead: Optional[bool] = False
|
33 |
+
) -> Tuple["PreTrainedModel", "PreTrainedTokenizer"]:
|
34 |
+
r"""
|
35 |
+
Loads pretrained model and tokenizer.
|
36 |
+
|
37 |
+
Support both training and inference.
|
38 |
+
"""
|
39 |
+
|
40 |
+
try_download_model_from_ms(model_args)
|
41 |
+
|
42 |
+
config_kwargs = {
|
43 |
+
"trust_remote_code": True,
|
44 |
+
"cache_dir": model_args.cache_dir,
|
45 |
+
"revision": model_args.model_revision,
|
46 |
+
"token": model_args.hf_hub_token
|
47 |
+
}
|
48 |
+
|
49 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
50 |
+
model_args.model_name_or_path,
|
51 |
+
use_fast=model_args.use_fast_tokenizer,
|
52 |
+
split_special_tokens=model_args.split_special_tokens,
|
53 |
+
padding_side="right",
|
54 |
+
**config_kwargs
|
55 |
+
)
|
56 |
+
patch_tokenizer(tokenizer)
|
57 |
+
|
58 |
+
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
|
59 |
+
patch_config(config, tokenizer, model_args, config_kwargs, is_trainable)
|
60 |
+
|
61 |
+
model = None
|
62 |
+
if is_trainable and model_args.use_unsloth:
|
63 |
+
require_version("unsloth", "Follow the instructions at: https://github.com/unslothai/unsloth")
|
64 |
+
from unsloth import FastLlamaModel, FastMistralModel # type: ignore
|
65 |
+
unsloth_kwargs = {
|
66 |
+
"model_name": model_args.model_name_or_path,
|
67 |
+
"max_seq_length": model_args.model_max_length,
|
68 |
+
"dtype": model_args.compute_dtype,
|
69 |
+
"load_in_4bit": model_args.quantization_bit == 4,
|
70 |
+
"token": model_args.hf_hub_token,
|
71 |
+
"device_map": get_current_device(),
|
72 |
+
"rope_scaling": getattr(config, "rope_scaling", None)
|
73 |
+
}
|
74 |
+
if getattr(config, "model_type", None) == "llama":
|
75 |
+
model, _ = FastLlamaModel.from_pretrained(**unsloth_kwargs)
|
76 |
+
elif getattr(config, "model_type", None) == "mistral":
|
77 |
+
model, _ = FastMistralModel.from_pretrained(**unsloth_kwargs)
|
78 |
+
else:
|
79 |
+
logger.warning("Unsloth does not support model type {}.".format(getattr(config, "model_type", None)))
|
80 |
+
model_args.use_unsloth = False
|
81 |
+
|
82 |
+
if model_args.adapter_name_or_path:
|
83 |
+
model_args.adapter_name_or_path = None
|
84 |
+
logger.warning("Unsloth does not support loading adapters.")
|
85 |
+
|
86 |
+
if model is None:
|
87 |
+
model = AutoModelForCausalLM.from_pretrained(
|
88 |
+
model_args.model_name_or_path,
|
89 |
+
config=config,
|
90 |
+
torch_dtype=model_args.compute_dtype,
|
91 |
+
low_cpu_mem_usage=(not is_deepspeed_zero3_enabled()),
|
92 |
+
**config_kwargs
|
93 |
+
)
|
94 |
+
|
95 |
+
patch_model(model, tokenizer, model_args, is_trainable)
|
96 |
+
register_autoclass(config, model, tokenizer)
|
97 |
+
|
98 |
+
model = init_adapter(model, model_args, finetuning_args, is_trainable)
|
99 |
+
|
100 |
+
if add_valuehead:
|
101 |
+
model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained(model)
|
102 |
+
patch_valuehead_model(model)
|
103 |
+
|
104 |
+
if model_args.adapter_name_or_path is not None:
|
105 |
+
vhead_path = model_args.adapter_name_or_path[-1]
|
106 |
+
else:
|
107 |
+
vhead_path = model_args.model_name_or_path
|
108 |
+
|
109 |
+
vhead_params = load_valuehead_params(vhead_path, model_args)
|
110 |
+
if vhead_params is not None:
|
111 |
+
model.load_state_dict(vhead_params, strict=False)
|
112 |
+
logger.info("Loaded valuehead from checkpoint: {}".format(vhead_path))
|
113 |
+
|
114 |
+
if not is_trainable:
|
115 |
+
model.requires_grad_(False)
|
116 |
+
model = model.to(model_args.compute_dtype) if not getattr(model, "quantization_method", None) else model
|
117 |
+
model.eval()
|
118 |
+
else:
|
119 |
+
model.train()
|
120 |
+
|
121 |
+
trainable_params, all_param = count_parameters(model)
|
122 |
+
logger.info("trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format(
|
123 |
+
trainable_params, all_param, 100 * trainable_params / all_param
|
124 |
+
))
|
125 |
+
|
126 |
+
if not is_trainable:
|
127 |
+
logger.info("This IS expected that the trainable params is 0 if you are using model for inference only.")
|
128 |
+
|
129 |
+
return model, tokenizer
|
LLM-Detector-V7-11w/src/llmtuner/model/parser.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
import logging
|
5 |
+
import datasets
|
6 |
+
import transformers
|
7 |
+
from typing import Any, Dict, Optional, Tuple
|
8 |
+
from transformers import HfArgumentParser, Seq2SeqTrainingArguments
|
9 |
+
from transformers.trainer_utils import get_last_checkpoint
|
10 |
+
|
11 |
+
from llmtuner.extras.logging import get_logger
|
12 |
+
from llmtuner.hparams import (
|
13 |
+
ModelArguments,
|
14 |
+
DataArguments,
|
15 |
+
EvaluationArguments,
|
16 |
+
FinetuningArguments,
|
17 |
+
GeneratingArguments
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
logger = get_logger(__name__)
|
22 |
+
|
23 |
+
|
24 |
+
_TRAIN_ARGS = [
|
25 |
+
ModelArguments, DataArguments, Seq2SeqTrainingArguments, FinetuningArguments, GeneratingArguments
|
26 |
+
]
|
27 |
+
_TRAIN_CLS = Tuple[
|
28 |
+
ModelArguments, DataArguments, Seq2SeqTrainingArguments, FinetuningArguments, GeneratingArguments
|
29 |
+
]
|
30 |
+
_INFER_ARGS = [
|
31 |
+
ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments
|
32 |
+
]
|
33 |
+
_INFER_CLS = Tuple[
|
34 |
+
ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments
|
35 |
+
]
|
36 |
+
_EVAL_ARGS = [
|
37 |
+
ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments
|
38 |
+
]
|
39 |
+
_EVAL_CLS = Tuple[
|
40 |
+
ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments
|
41 |
+
]
|
42 |
+
|
43 |
+
|
44 |
+
def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]:
|
45 |
+
if args is not None:
|
46 |
+
return parser.parse_dict(args)
|
47 |
+
|
48 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
|
49 |
+
return parser.parse_yaml_file(os.path.abspath(sys.argv[1]))
|
50 |
+
|
51 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
52 |
+
return parser.parse_json_file(os.path.abspath(sys.argv[1]))
|
53 |
+
|
54 |
+
(*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
55 |
+
|
56 |
+
if unknown_args:
|
57 |
+
print(parser.format_help())
|
58 |
+
print("Got unknown args, potentially deprecated arguments: {}".format(unknown_args))
|
59 |
+
raise ValueError("Some specified arguments are not used by the HfArgumentParser: {}".format(unknown_args))
|
60 |
+
|
61 |
+
return (*parsed_args,)
|
62 |
+
|
63 |
+
|
64 |
+
def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None:
|
65 |
+
datasets.utils.logging.set_verbosity(log_level)
|
66 |
+
transformers.utils.logging.set_verbosity(log_level)
|
67 |
+
transformers.utils.logging.enable_default_handler()
|
68 |
+
transformers.utils.logging.enable_explicit_format()
|
69 |
+
|
70 |
+
|
71 |
+
def _verify_model_args(model_args: "ModelArguments", finetuning_args: "FinetuningArguments") -> None:
|
72 |
+
if model_args.quantization_bit is not None:
|
73 |
+
if finetuning_args.finetuning_type != "lora":
|
74 |
+
raise ValueError("Quantization is only compatible with the LoRA method.")
|
75 |
+
|
76 |
+
if finetuning_args.create_new_adapter:
|
77 |
+
raise ValueError("Cannot create new adapter upon a quantized model.")
|
78 |
+
|
79 |
+
if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
|
80 |
+
if finetuning_args.finetuning_type != "lora":
|
81 |
+
raise ValueError("Multiple adapters are only available for LoRA tuning.")
|
82 |
+
|
83 |
+
if model_args.quantization_bit is not None:
|
84 |
+
raise ValueError("Quantized model only accepts a single adapter. Merge them first.")
|
85 |
+
|
86 |
+
|
87 |
+
def _parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
88 |
+
parser = HfArgumentParser(_TRAIN_ARGS)
|
89 |
+
return _parse_args(parser, args)
|
90 |
+
|
91 |
+
|
92 |
+
def _parse_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
93 |
+
parser = HfArgumentParser(_INFER_ARGS)
|
94 |
+
return _parse_args(parser, args)
|
95 |
+
|
96 |
+
|
97 |
+
def _parse_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:
|
98 |
+
parser = HfArgumentParser(_EVAL_ARGS)
|
99 |
+
return _parse_args(parser, args)
|
100 |
+
|
101 |
+
|
102 |
+
def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
103 |
+
model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_args(args)
|
104 |
+
|
105 |
+
# Setup logging
|
106 |
+
if training_args.should_log:
|
107 |
+
_set_transformers_logging()
|
108 |
+
|
109 |
+
# Check arguments
|
110 |
+
data_args.init_for_training(training_args.seed)
|
111 |
+
|
112 |
+
if finetuning_args.stage != "pt" and data_args.template is None:
|
113 |
+
raise ValueError("Please specify which `template` to use.")
|
114 |
+
|
115 |
+
if finetuning_args.stage != "sft" and training_args.predict_with_generate:
|
116 |
+
raise ValueError("`predict_with_generate` cannot be set as True except SFT.")
|
117 |
+
|
118 |
+
if finetuning_args.stage == "sft" and training_args.do_predict and not training_args.predict_with_generate:
|
119 |
+
raise ValueError("Please enable `predict_with_generate` to save model predictions.")
|
120 |
+
|
121 |
+
if finetuning_args.stage in ["rm", "ppo"] and training_args.load_best_model_at_end:
|
122 |
+
raise ValueError("RM and PPO stages do not support `load_best_model_at_end`.")
|
123 |
+
|
124 |
+
if finetuning_args.stage == "ppo" and not training_args.do_train:
|
125 |
+
raise ValueError("PPO training does not support evaluation, use the SFT stage to evaluate models.")
|
126 |
+
|
127 |
+
if finetuning_args.stage in ["rm", "dpo"] and (not all([data_attr.ranking for data_attr in data_args.dataset_list])):
|
128 |
+
raise ValueError("Please use ranked datasets for reward modeling or DPO training.")
|
129 |
+
|
130 |
+
if finetuning_args.stage == "ppo" and model_args.shift_attn:
|
131 |
+
raise ValueError("PPO training is incompatible with S^2-Attn.")
|
132 |
+
|
133 |
+
if finetuning_args.stage == "ppo" and finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
|
134 |
+
raise ValueError("Unsloth does not support lora reward model.")
|
135 |
+
|
136 |
+
if training_args.max_steps == -1 and data_args.streaming:
|
137 |
+
raise ValueError("Please specify `max_steps` in streaming mode.")
|
138 |
+
|
139 |
+
if training_args.do_train and training_args.predict_with_generate:
|
140 |
+
raise ValueError("`predict_with_generate` cannot be set as True while training.")
|
141 |
+
|
142 |
+
if training_args.do_train and finetuning_args.finetuning_type == "lora" and finetuning_args.lora_target is None:
|
143 |
+
raise ValueError("Please specify `lora_target` in LoRA training.")
|
144 |
+
|
145 |
+
_verify_model_args(model_args, finetuning_args)
|
146 |
+
|
147 |
+
if training_args.do_train and model_args.quantization_bit is not None and (not model_args.upcast_layernorm):
|
148 |
+
logger.warning("We recommend enable `upcast_layernorm` in quantized training.")
|
149 |
+
|
150 |
+
if training_args.do_train and (not training_args.fp16) and (not training_args.bf16):
|
151 |
+
logger.warning("We recommend enable mixed precision training.")
|
152 |
+
|
153 |
+
if (not training_args.do_train) and model_args.quantization_bit is not None:
|
154 |
+
logger.warning("Evaluating model in 4/8-bit mode may cause lower scores.")
|
155 |
+
|
156 |
+
if (not training_args.do_train) and finetuning_args.stage == "dpo" and finetuning_args.ref_model is None:
|
157 |
+
logger.warning("Specify `ref_model` for computing rewards at evaluation.")
|
158 |
+
|
159 |
+
# postprocess training_args
|
160 |
+
if (
|
161 |
+
training_args.local_rank != -1
|
162 |
+
and training_args.ddp_find_unused_parameters is None
|
163 |
+
and finetuning_args.finetuning_type == "lora"
|
164 |
+
):
|
165 |
+
logger.warning("`ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.")
|
166 |
+
training_args_dict = training_args.to_dict()
|
167 |
+
training_args_dict.update(dict(ddp_find_unused_parameters=False))
|
168 |
+
training_args = Seq2SeqTrainingArguments(**training_args_dict)
|
169 |
+
|
170 |
+
if finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type in ["full", "freeze"]:
|
171 |
+
can_resume_from_checkpoint = False
|
172 |
+
training_args.resume_from_checkpoint = None
|
173 |
+
else:
|
174 |
+
can_resume_from_checkpoint = True
|
175 |
+
|
176 |
+
if (
|
177 |
+
training_args.resume_from_checkpoint is None
|
178 |
+
and training_args.do_train
|
179 |
+
and os.path.isdir(training_args.output_dir)
|
180 |
+
and not training_args.overwrite_output_dir
|
181 |
+
and can_resume_from_checkpoint
|
182 |
+
):
|
183 |
+
last_checkpoint = get_last_checkpoint(training_args.output_dir)
|
184 |
+
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
185 |
+
raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.")
|
186 |
+
|
187 |
+
if last_checkpoint is not None:
|
188 |
+
training_args_dict = training_args.to_dict()
|
189 |
+
training_args_dict.update(dict(resume_from_checkpoint=last_checkpoint))
|
190 |
+
training_args = Seq2SeqTrainingArguments(**training_args_dict)
|
191 |
+
logger.info("Resuming training from {}. Change `output_dir` or use `overwrite_output_dir` to avoid.".format(
|
192 |
+
training_args.resume_from_checkpoint
|
193 |
+
))
|
194 |
+
|
195 |
+
if (
|
196 |
+
finetuning_args.stage in ["rm", "ppo"]
|
197 |
+
and finetuning_args.finetuning_type == "lora"
|
198 |
+
and training_args.resume_from_checkpoint is not None
|
199 |
+
):
|
200 |
+
logger.warning("Add {} to `adapter_name_or_path` to resume training from checkpoint.".format(
|
201 |
+
training_args.resume_from_checkpoint
|
202 |
+
))
|
203 |
+
|
204 |
+
# postprocess model_args
|
205 |
+
model_args.compute_dtype = (
|
206 |
+
torch.bfloat16 if training_args.bf16 else (torch.float16 if training_args.fp16 else None)
|
207 |
+
)
|
208 |
+
model_args.model_max_length = data_args.cutoff_len
|
209 |
+
|
210 |
+
# Log on each process the small summary:
|
211 |
+
logger.info("Process rank: {}, device: {}, n_gpu: {}\n distributed training: {}, compute dtype: {}".format(
|
212 |
+
training_args.local_rank, training_args.device, training_args.n_gpu,
|
213 |
+
bool(training_args.local_rank != -1), str(model_args.compute_dtype)
|
214 |
+
))
|
215 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
216 |
+
|
217 |
+
# Set seed before initializing model.
|
218 |
+
transformers.set_seed(training_args.seed)
|
219 |
+
|
220 |
+
return model_args, data_args, training_args, finetuning_args, generating_args
|
221 |
+
|
222 |
+
|
223 |
+
def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
224 |
+
model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args)
|
225 |
+
_set_transformers_logging()
|
226 |
+
|
227 |
+
if data_args.template is None:
|
228 |
+
raise ValueError("Please specify which `template` to use.")
|
229 |
+
|
230 |
+
_verify_model_args(model_args, finetuning_args)
|
231 |
+
|
232 |
+
return model_args, data_args, finetuning_args, generating_args
|
233 |
+
|
234 |
+
|
235 |
+
def get_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS:
|
236 |
+
model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args)
|
237 |
+
_set_transformers_logging()
|
238 |
+
|
239 |
+
if data_args.template is None:
|
240 |
+
raise ValueError("Please specify which `template` to use.")
|
241 |
+
|
242 |
+
_verify_model_args(model_args, finetuning_args)
|
243 |
+
|
244 |
+
transformers.set_seed(eval_args.seed)
|
245 |
+
|
246 |
+
return model_args, data_args, eval_args, finetuning_args
|
LLM-Detector-V7-11w/src/llmtuner/model/patcher.py
ADDED
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import math
|
3 |
+
import torch
|
4 |
+
import random
|
5 |
+
from types import MethodType
|
6 |
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
7 |
+
from datasets import load_dataset
|
8 |
+
|
9 |
+
from transformers import BitsAndBytesConfig, GPTQConfig, PreTrainedModel, PreTrainedTokenizerBase
|
10 |
+
from transformers.integrations import is_deepspeed_zero3_enabled
|
11 |
+
from transformers.utils.versions import require_version
|
12 |
+
|
13 |
+
from llmtuner.extras.constants import FILEEXT2TYPE, LAYERNORM_NAMES
|
14 |
+
from llmtuner.extras.logging import get_logger
|
15 |
+
from llmtuner.extras.misc import get_current_device, infer_optim_dtype
|
16 |
+
from llmtuner.extras.packages import is_flash_attn2_available
|
17 |
+
|
18 |
+
if TYPE_CHECKING:
|
19 |
+
from transformers import PretrainedConfig, PreTrainedTokenizer
|
20 |
+
from trl import AutoModelForCausalLMWithValueHead
|
21 |
+
from llmtuner.hparams import ModelArguments
|
22 |
+
|
23 |
+
|
24 |
+
logger = get_logger(__name__)
|
25 |
+
SUPPORTED_CLASS_FOR_S2ATTN = [] # TODO: add llama
|
26 |
+
|
27 |
+
|
28 |
+
def _noisy_mean_initialization(embed_weight: torch.Tensor, num_new_tokens: int):
|
29 |
+
embedding_dim = embed_weight.size(1)
|
30 |
+
avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True)
|
31 |
+
noise_weight = torch.empty_like(avg_weight[-num_new_tokens:])
|
32 |
+
noise_weight.normal_(mean=0, std=(1.0 / math.sqrt(embedding_dim)))
|
33 |
+
embed_weight[-num_new_tokens:] = avg_weight + noise_weight
|
34 |
+
|
35 |
+
|
36 |
+
def _resize_embedding_layer(model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer") -> None:
|
37 |
+
r"""
|
38 |
+
Resize token embeddings.
|
39 |
+
"""
|
40 |
+
current_embedding_size = model.get_input_embeddings().weight.size(0)
|
41 |
+
if len(tokenizer) > current_embedding_size:
|
42 |
+
if not isinstance(model.get_output_embeddings(), torch.nn.Linear):
|
43 |
+
logger.warning("Current model does not support resizing token embeddings.")
|
44 |
+
return
|
45 |
+
|
46 |
+
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)
|
47 |
+
new_embedding_size = model.get_input_embeddings().weight.size(0)
|
48 |
+
num_new_tokens = new_embedding_size - current_embedding_size
|
49 |
+
_noisy_mean_initialization(model.get_input_embeddings().weight.data, num_new_tokens)
|
50 |
+
_noisy_mean_initialization(model.get_output_embeddings().weight.data, num_new_tokens)
|
51 |
+
|
52 |
+
logger.info("Resized token embeddings from {} to {}.".format(current_embedding_size, new_embedding_size))
|
53 |
+
|
54 |
+
|
55 |
+
def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]:
|
56 |
+
r"""
|
57 |
+
Inspired by: https://github.com/huggingface/optimum/blob/v1.16.0/optimum/gptq/data.py#L133
|
58 |
+
TODO: remove tokenizer.decode() https://github.com/huggingface/optimum/pull/1600
|
59 |
+
"""
|
60 |
+
if os.path.isfile(model_args.export_quantization_dataset):
|
61 |
+
data_path = FILEEXT2TYPE.get(model_args.export_quantization_dataset.split(".")[-1], None)
|
62 |
+
data_files = model_args.export_quantization_dataset
|
63 |
+
else:
|
64 |
+
data_path = model_args.export_quantization_dataset
|
65 |
+
data_files = None
|
66 |
+
|
67 |
+
dataset = load_dataset(path=data_path, data_files=data_files, split="train", cache_dir=model_args.cache_dir)
|
68 |
+
maxlen = model_args.export_quantization_maxlen
|
69 |
+
|
70 |
+
samples = []
|
71 |
+
for _ in range(model_args.export_quantization_nsamples):
|
72 |
+
while True:
|
73 |
+
sample_idx = random.randint(0, len(dataset) - 1)
|
74 |
+
sample: Dict[str, torch.Tensor] = tokenizer(dataset[sample_idx]["text"], return_tensors="pt")
|
75 |
+
if sample["input_ids"].size(1) >= maxlen:
|
76 |
+
break # TODO: fix large maxlen
|
77 |
+
|
78 |
+
word_idx = random.randint(0, sample["input_ids"].size(1) - maxlen - 1)
|
79 |
+
input_ids = sample["input_ids"][:, word_idx : word_idx + maxlen]
|
80 |
+
samples.append(tokenizer.decode(input_ids[0].tolist(), skip_special_tokens=True))
|
81 |
+
|
82 |
+
return samples
|
83 |
+
|
84 |
+
|
85 |
+
def _configure_rope(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
|
86 |
+
if not hasattr(config, "rope_scaling"):
|
87 |
+
logger.warning("Current model does not support RoPE scaling.")
|
88 |
+
return
|
89 |
+
|
90 |
+
if is_trainable:
|
91 |
+
if model_args.rope_scaling == "dynamic":
|
92 |
+
logger.warning(
|
93 |
+
"Dynamic NTK scaling may not work well with fine-tuning. "
|
94 |
+
"See: https://github.com/huggingface/transformers/pull/24653"
|
95 |
+
)
|
96 |
+
|
97 |
+
current_max_length = getattr(config, "max_position_embeddings", None)
|
98 |
+
if current_max_length and model_args.model_max_length > current_max_length:
|
99 |
+
scaling_factor = float(math.ceil(model_args.model_max_length / current_max_length))
|
100 |
+
else:
|
101 |
+
logger.warning("Input length is smaller than max length. Consider increase input length.")
|
102 |
+
scaling_factor = 1.0
|
103 |
+
else:
|
104 |
+
scaling_factor = 2.0
|
105 |
+
|
106 |
+
setattr(config, "rope_scaling", {"type": model_args.rope_scaling, "factor": scaling_factor})
|
107 |
+
logger.info("Using {} scaling strategy and setting scaling factor to {}".format(
|
108 |
+
model_args.rope_scaling, scaling_factor
|
109 |
+
))
|
110 |
+
|
111 |
+
|
112 |
+
def _configure_flashattn(config_kwargs: Dict[str, Any]) -> None:
|
113 |
+
if not is_flash_attn2_available():
|
114 |
+
logger.warning("FlashAttention2 is not installed.")
|
115 |
+
return
|
116 |
+
|
117 |
+
config_kwargs["use_flash_attention_2"] = True
|
118 |
+
logger.info("Using FlashAttention-2 for faster training and inference.")
|
119 |
+
|
120 |
+
|
121 |
+
def _configure_longlora(config: "PretrainedConfig") -> None:
|
122 |
+
if getattr(config, "model_type", None) in SUPPORTED_CLASS_FOR_S2ATTN:
|
123 |
+
setattr(config, "group_size_ratio", 0.25)
|
124 |
+
logger.info("Using shift short attention with group_size_ratio=1/4.")
|
125 |
+
else:
|
126 |
+
logger.warning("Current model does not support shift short attention.")
|
127 |
+
|
128 |
+
|
129 |
+
def _configure_quantization(
|
130 |
+
config: "PretrainedConfig",
|
131 |
+
tokenizer: "PreTrainedTokenizer",
|
132 |
+
model_args: "ModelArguments",
|
133 |
+
config_kwargs: Dict[str, Any]
|
134 |
+
) -> None:
|
135 |
+
r"""
|
136 |
+
Priority: GPTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training)
|
137 |
+
"""
|
138 |
+
if getattr(config, "quantization_config", None): # gptq
|
139 |
+
if is_deepspeed_zero3_enabled():
|
140 |
+
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
|
141 |
+
|
142 |
+
config_kwargs["device_map"] = {"": get_current_device()}
|
143 |
+
quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None)
|
144 |
+
if quantization_config.get("quant_method", None) == "gptq" and quantization_config.get("bits", -1) == 4:
|
145 |
+
quantization_config["use_exllama"] = False # disable exllama
|
146 |
+
logger.info("Loading {}-bit GPTQ-quantized model.".format(quantization_config.get("bits", -1)))
|
147 |
+
|
148 |
+
elif model_args.export_quantization_bit is not None: # auto-gptq
|
149 |
+
require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0")
|
150 |
+
require_version("auto_gptq>=0.5.0", "To fix: pip install auto_gptq>=0.5.0")
|
151 |
+
from accelerate.utils import get_max_memory
|
152 |
+
|
153 |
+
if getattr(config, "model_type", None) == "chatglm":
|
154 |
+
raise ValueError("ChatGLM model is not supported.")
|
155 |
+
|
156 |
+
config_kwargs["quantization_config"] = GPTQConfig(
|
157 |
+
bits=model_args.export_quantization_bit,
|
158 |
+
tokenizer=tokenizer,
|
159 |
+
dataset=_get_quantization_dataset(tokenizer, model_args)
|
160 |
+
)
|
161 |
+
config_kwargs["device_map"] = "auto"
|
162 |
+
config_kwargs["max_memory"] = get_max_memory()
|
163 |
+
logger.info("Quantizing model to {} bit.".format(model_args.export_quantization_bit))
|
164 |
+
|
165 |
+
elif model_args.quantization_bit is not None: # bnb
|
166 |
+
if is_deepspeed_zero3_enabled():
|
167 |
+
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
|
168 |
+
|
169 |
+
if model_args.quantization_bit == 8:
|
170 |
+
require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
|
171 |
+
config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
172 |
+
|
173 |
+
elif model_args.quantization_bit == 4:
|
174 |
+
require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
|
175 |
+
config_kwargs["quantization_config"] = BitsAndBytesConfig(
|
176 |
+
load_in_4bit=True,
|
177 |
+
bnb_4bit_compute_dtype=model_args.compute_dtype,
|
178 |
+
bnb_4bit_use_double_quant=model_args.double_quantization,
|
179 |
+
bnb_4bit_quant_type=model_args.quantization_type
|
180 |
+
)
|
181 |
+
|
182 |
+
config_kwargs["device_map"] = {"": get_current_device()}
|
183 |
+
logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit))
|
184 |
+
|
185 |
+
|
186 |
+
def _prepare_model_for_training(
|
187 |
+
model: "PreTrainedModel",
|
188 |
+
model_args: "ModelArguments",
|
189 |
+
output_layer_name: Optional[str] = "lm_head"
|
190 |
+
) -> None:
|
191 |
+
r"""
|
192 |
+
Includes:
|
193 |
+
(1) cast the layernorm in fp32
|
194 |
+
(2) make output embedding layer require grads
|
195 |
+
(3) add the upcasting of the lm_head in fp32
|
196 |
+
Inspired by: https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/other.py#L72
|
197 |
+
"""
|
198 |
+
if model_args.upcast_layernorm:
|
199 |
+
for name, param in model.named_parameters():
|
200 |
+
if param.ndim == 1 and any(ln_name in name for ln_name in LAYERNORM_NAMES):
|
201 |
+
param.data = param.data.to(torch.float32)
|
202 |
+
logger.info("Upcasting layernorm weights in float32.")
|
203 |
+
|
204 |
+
if not model_args.disable_gradient_checkpointing:
|
205 |
+
if not getattr(model, "supports_gradient_checkpointing", False):
|
206 |
+
logger.warning("Current model does not support gradient checkpointing.")
|
207 |
+
else:
|
208 |
+
model.enable_input_require_grads()
|
209 |
+
model.gradient_checkpointing_enable()
|
210 |
+
model.config.use_cache = False # turn off when gradient checkpointing is enabled
|
211 |
+
logger.info("Gradient checkpointing enabled.")
|
212 |
+
|
213 |
+
if hasattr(model, output_layer_name):
|
214 |
+
def fp32_forward_post_hook(module: torch.nn.Module, args: Tuple[torch.Tensor], output: torch.Tensor):
|
215 |
+
return output.to(torch.float32)
|
216 |
+
|
217 |
+
output_layer = getattr(model, output_layer_name)
|
218 |
+
if isinstance(output_layer, torch.nn.Linear) and output_layer.weight.dtype != torch.float32:
|
219 |
+
output_layer.register_forward_hook(fp32_forward_post_hook)
|
220 |
+
|
221 |
+
|
222 |
+
def patch_tokenizer(tokenizer: "PreTrainedTokenizer") -> None:
|
223 |
+
if "PreTrainedTokenizerBase" not in str(tokenizer._pad.__func__):
|
224 |
+
tokenizer._pad = MethodType(PreTrainedTokenizerBase._pad, tokenizer)
|
225 |
+
|
226 |
+
|
227 |
+
def patch_config(
|
228 |
+
config: "PretrainedConfig",
|
229 |
+
tokenizer: "PreTrainedTokenizer",
|
230 |
+
model_args: "ModelArguments",
|
231 |
+
config_kwargs: Dict[str, Any],
|
232 |
+
is_trainable: bool
|
233 |
+
) -> None:
|
234 |
+
if model_args.compute_dtype is None: # priority: bf16 > fp16 > fp32
|
235 |
+
model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))
|
236 |
+
|
237 |
+
if getattr(config, "model_type", None) == "qwen":
|
238 |
+
for dtype_name, dtype in [("fp16", torch.float16), ("bf16", torch.bfloat16), ("fp32", torch.float32)]:
|
239 |
+
setattr(config, dtype_name, model_args.compute_dtype == dtype)
|
240 |
+
|
241 |
+
if model_args.rope_scaling is not None:
|
242 |
+
_configure_rope(config, model_args, is_trainable)
|
243 |
+
|
244 |
+
if model_args.flash_attn:
|
245 |
+
_configure_flashattn(config_kwargs)
|
246 |
+
|
247 |
+
if is_trainable and model_args.shift_attn:
|
248 |
+
_configure_longlora(config)
|
249 |
+
|
250 |
+
_configure_quantization(config, tokenizer, model_args, config_kwargs)
|
251 |
+
|
252 |
+
|
253 |
+
def patch_model(
|
254 |
+
model: "PreTrainedModel",
|
255 |
+
tokenizer: "PreTrainedTokenizer",
|
256 |
+
model_args: "ModelArguments",
|
257 |
+
is_trainable: bool
|
258 |
+
) -> None:
|
259 |
+
if "GenerationMixin" not in str(model.generate.__func__):
|
260 |
+
model.generate = MethodType(PreTrainedModel.generate, model)
|
261 |
+
|
262 |
+
if getattr(model.config, "model_type", None) == "chatglm":
|
263 |
+
setattr(model, "lm_head", model.transformer.output_layer)
|
264 |
+
setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
|
265 |
+
|
266 |
+
if model_args.resize_vocab:
|
267 |
+
if is_deepspeed_zero3_enabled():
|
268 |
+
raise ValueError("DeepSpeed ZeRO-3 is incompatible with vocab resizing.")
|
269 |
+
|
270 |
+
_resize_embedding_layer(model, tokenizer)
|
271 |
+
|
272 |
+
if is_trainable:
|
273 |
+
_prepare_model_for_training(model, model_args)
|
274 |
+
|
275 |
+
|
276 |
+
def patch_valuehead_model(model: "AutoModelForCausalLMWithValueHead") -> None:
|
277 |
+
def tie_weights(self: "AutoModelForCausalLMWithValueHead") -> None:
|
278 |
+
if isinstance(self.pretrained_model, PreTrainedModel):
|
279 |
+
self.pretrained_model.tie_weights()
|
280 |
+
|
281 |
+
def get_input_embeddings(self: "AutoModelForCausalLMWithValueHead") -> torch.nn.Module:
|
282 |
+
if isinstance(self.pretrained_model, PreTrainedModel):
|
283 |
+
return self.pretrained_model.get_input_embeddings()
|
284 |
+
|
285 |
+
ignore_modules = [name for name, _ in model.named_parameters() if "pretrained_model" in name]
|
286 |
+
setattr(model, "_keys_to_ignore_on_save", ignore_modules)
|
287 |
+
setattr(model, "tie_weights", MethodType(tie_weights, model))
|
288 |
+
setattr(model, "get_input_embeddings", MethodType(get_input_embeddings, model))
|
LLM-Detector-V7-11w/src/llmtuner/model/utils.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import inspect
|
3 |
+
from typing import TYPE_CHECKING, Any, Dict, List
|
4 |
+
from transformers import PreTrainedModel
|
5 |
+
from transformers.utils import cached_file
|
6 |
+
|
7 |
+
from llmtuner.extras.constants import V_HEAD_WEIGHTS_NAME, V_HEAD_SAFE_WEIGHTS_NAME
|
8 |
+
from llmtuner.extras.logging import get_logger
|
9 |
+
from llmtuner.extras.misc import get_current_device
|
10 |
+
|
11 |
+
if TYPE_CHECKING:
|
12 |
+
from transformers import PretrainedConfig, PreTrainedTokenizer
|
13 |
+
from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments
|
14 |
+
|
15 |
+
|
16 |
+
logger = get_logger(__name__)
|
17 |
+
|
18 |
+
|
19 |
+
def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel":
|
20 |
+
r"""
|
21 |
+
Dispatches a pre-trained model to GPUs with balanced memory when the GPU is available.
|
22 |
+
Borrowed from: https://github.com/huggingface/transformers/blob/v4.36.2/src/transformers/modeling_utils.py#L3570
|
23 |
+
"""
|
24 |
+
if getattr(model, "quantization_method", None): # already set on current device
|
25 |
+
return model
|
26 |
+
|
27 |
+
if (
|
28 |
+
torch.cuda.device_count() > 1
|
29 |
+
and isinstance(model, PreTrainedModel)
|
30 |
+
and model._no_split_modules is not None
|
31 |
+
and model.config.model_type != "chatglm"
|
32 |
+
):
|
33 |
+
from accelerate import dispatch_model
|
34 |
+
from accelerate.utils import infer_auto_device_map, get_balanced_memory
|
35 |
+
|
36 |
+
kwargs = {"dtype": model.dtype, "no_split_module_classes": model._get_no_split_modules("auto")}
|
37 |
+
max_memory = get_balanced_memory(model, **kwargs)
|
38 |
+
# Make sure tied weights are tied before creating the device map.
|
39 |
+
model.tie_weights()
|
40 |
+
device_map = infer_auto_device_map(model, max_memory=max_memory, **kwargs)
|
41 |
+
device_map_kwargs = {"device_map": device_map}
|
42 |
+
if "skip_keys" in inspect.signature(dispatch_model).parameters:
|
43 |
+
device_map_kwargs["skip_keys"] = model._skip_keys_device_placement
|
44 |
+
return dispatch_model(model, **device_map_kwargs)
|
45 |
+
else:
|
46 |
+
return model.to(device=get_current_device())
|
47 |
+
|
48 |
+
|
49 |
+
def find_all_linear_modules(model: "PreTrainedModel") -> List[str]:
|
50 |
+
r"""
|
51 |
+
Finds all available modules to apply lora.
|
52 |
+
"""
|
53 |
+
quantization_method = getattr(model, "quantization_method", None)
|
54 |
+
if quantization_method is None:
|
55 |
+
linear_cls = torch.nn.Linear
|
56 |
+
elif quantization_method == "bitsandbytes":
|
57 |
+
import bitsandbytes as bnb
|
58 |
+
linear_cls = bnb.nn.Linear4bit if getattr(model, "is_loaded_in_4bit", False) else bnb.nn.Linear8bitLt
|
59 |
+
else:
|
60 |
+
raise ValueError("Finding linear modules for {} models is not supported.".format(quantization_method))
|
61 |
+
|
62 |
+
output_layer_names = ["lm_head"]
|
63 |
+
if model.config.model_type == "chatglm":
|
64 |
+
output_layer_names.append("output_layer")
|
65 |
+
|
66 |
+
module_names = set()
|
67 |
+
for name, module in model.named_modules():
|
68 |
+
if (
|
69 |
+
isinstance(module, linear_cls)
|
70 |
+
and not any([output_layer in name for output_layer in output_layer_names])
|
71 |
+
):
|
72 |
+
module_names.add(name.split(".")[-1])
|
73 |
+
|
74 |
+
logger.info("Found linear modules: {}".format(",".join(module_names)))
|
75 |
+
return list(module_names)
|
76 |
+
|
77 |
+
|
78 |
+
def get_modelcard_args(
|
79 |
+
model_args: "ModelArguments",
|
80 |
+
data_args: "DataArguments",
|
81 |
+
finetuning_args: "FinetuningArguments"
|
82 |
+
) -> Dict[str, Any]:
|
83 |
+
return {
|
84 |
+
"tasks": "text-generation",
|
85 |
+
"license": "other",
|
86 |
+
"finetuned_from": model_args.model_name_or_path,
|
87 |
+
"dataset": [dataset.strip() for dataset in data_args.dataset.split(",")],
|
88 |
+
"tags": ["llama-factory"] + (["lora"] if finetuning_args.finetuning_type == "lora" else [])
|
89 |
+
}
|
90 |
+
|
91 |
+
|
92 |
+
def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> Dict[str, torch.Tensor]:
|
93 |
+
r"""
|
94 |
+
Loads value head parameters from Hugging Face Hub or local disk.
|
95 |
+
|
96 |
+
Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`.
|
97 |
+
"""
|
98 |
+
kwargs = {
|
99 |
+
"path_or_repo_id": path_or_repo_id,
|
100 |
+
"cache_dir": model_args.cache_dir,
|
101 |
+
"token": model_args.hf_hub_token
|
102 |
+
}
|
103 |
+
|
104 |
+
try:
|
105 |
+
from safetensors import safe_open
|
106 |
+
vhead_file = cached_file(filename=V_HEAD_SAFE_WEIGHTS_NAME, **kwargs)
|
107 |
+
with safe_open(vhead_file, framework="pt", device="cpu") as f:
|
108 |
+
return {key: f.get_tensor(key) for key in f.keys()}
|
109 |
+
except Exception as err:
|
110 |
+
logger.info("Failed to load {}: {}".format(V_HEAD_SAFE_WEIGHTS_NAME, str(err)))
|
111 |
+
|
112 |
+
try:
|
113 |
+
vhead_file = cached_file(filename=V_HEAD_WEIGHTS_NAME, **kwargs)
|
114 |
+
return torch.load(vhead_file, map_location="cpu")
|
115 |
+
except Exception as err:
|
116 |
+
logger.info("Failed to load {}: {}".format(V_HEAD_WEIGHTS_NAME, str(err)))
|
117 |
+
|
118 |
+
logger.info("Provided path ({}) does not contain value head weights.".format(path_or_repo_id))
|
119 |
+
logger.info("Ignore these messages if you are not resuming the training of a value head model.")
|
120 |
+
return None
|
121 |
+
|
122 |
+
|
123 |
+
def register_autoclass(config: "PretrainedConfig", model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer"):
|
124 |
+
if "AutoConfig" in getattr(config, "auto_map", {}):
|
125 |
+
config.__class__.register_for_auto_class()
|
126 |
+
if "AutoModelForCausalLM" in getattr(config, "auto_map", {}):
|
127 |
+
model.__class__.register_for_auto_class()
|
128 |
+
if "AutoTokenizer" in tokenizer.init_kwargs.get("auto_map", {}):
|
129 |
+
tokenizer.__class__.register_for_auto_class()
|
LLM-Detector-V7-11w/src/llmtuner/train/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.train.tuner import export_model, run_exp
|
LLM-Detector-V7-11w/src/llmtuner/train/dpo/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.train.dpo.workflow import run_dpo
|
LLM-Detector-V7-11w/src/llmtuner/train/dpo/collator.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from typing import Any, Dict, List, Sequence, Tuple
|
4 |
+
from transformers import DataCollatorForSeq2Seq
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class DPODataCollatorWithPadding(DataCollatorForSeq2Seq):
|
9 |
+
r"""
|
10 |
+
Data collator for pairwise data.
|
11 |
+
"""
|
12 |
+
|
13 |
+
def _pad_labels(self, batch: torch.Tensor, positions: List[Tuple[int, int]]) -> torch.Tensor:
|
14 |
+
padded_labels = []
|
15 |
+
for feature, (prompt_len, answer_len) in zip(batch, positions):
|
16 |
+
if self.tokenizer.padding_side == "left":
|
17 |
+
start, end = feature.size(0) - answer_len, feature.size(0)
|
18 |
+
else:
|
19 |
+
start, end = prompt_len, prompt_len + answer_len
|
20 |
+
padded_tensor = self.label_pad_token_id * torch.ones_like(feature)
|
21 |
+
padded_tensor[start:end] = feature[start:end]
|
22 |
+
padded_labels.append(padded_tensor)
|
23 |
+
return torch.stack(padded_labels, dim=0).contiguous() # in contiguous memory
|
24 |
+
|
25 |
+
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
26 |
+
r"""
|
27 |
+
Pads batched data to the longest sequence in the batch.
|
28 |
+
|
29 |
+
We generate 2 * n examples where the first n examples represent chosen examples and
|
30 |
+
the last n examples represent rejected examples.
|
31 |
+
"""
|
32 |
+
concatenated_features = []
|
33 |
+
label_positions = []
|
34 |
+
for key in ("chosen_ids", "rejected_ids"):
|
35 |
+
for feature in features:
|
36 |
+
prompt_len, answer_len = len(feature["prompt_ids"]), len(feature[key])
|
37 |
+
concatenated_features.append({
|
38 |
+
"input_ids": feature["prompt_ids"] + feature[key],
|
39 |
+
"attention_mask": [1] * (prompt_len + answer_len)
|
40 |
+
})
|
41 |
+
label_positions.append((prompt_len, answer_len))
|
42 |
+
|
43 |
+
batch = self.tokenizer.pad(
|
44 |
+
concatenated_features,
|
45 |
+
padding=self.padding,
|
46 |
+
max_length=self.max_length,
|
47 |
+
pad_to_multiple_of=self.pad_to_multiple_of,
|
48 |
+
return_tensors=self.return_tensors,
|
49 |
+
)
|
50 |
+
batch["labels"] = self._pad_labels(batch["input_ids"], label_positions)
|
51 |
+
return batch
|
LLM-Detector-V7-11w/src/llmtuner/train/dpo/trainer.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from collections import defaultdict
|
3 |
+
from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union
|
4 |
+
from transformers import BatchEncoding, Trainer
|
5 |
+
from trl import DPOTrainer
|
6 |
+
from trl.trainer.utils import disable_dropout_in_model
|
7 |
+
|
8 |
+
from llmtuner.extras.constants import IGNORE_INDEX
|
9 |
+
|
10 |
+
if TYPE_CHECKING:
|
11 |
+
from transformers import PreTrainedModel
|
12 |
+
|
13 |
+
|
14 |
+
class CustomDPOTrainer(DPOTrainer):
|
15 |
+
|
16 |
+
def __init__(
|
17 |
+
self,
|
18 |
+
beta: float,
|
19 |
+
loss_type: Literal["sigmoid", "hinge", "ipo", "kto"],
|
20 |
+
ftx_gamma: float,
|
21 |
+
model: Union["PreTrainedModel", torch.nn.Module],
|
22 |
+
ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]] = None,
|
23 |
+
disable_dropout: Optional[bool] = True,
|
24 |
+
**kwargs
|
25 |
+
):
|
26 |
+
if disable_dropout:
|
27 |
+
disable_dropout_in_model(model)
|
28 |
+
if ref_model is not None:
|
29 |
+
disable_dropout_in_model(ref_model)
|
30 |
+
|
31 |
+
self.use_dpo_data_collator = True # hack to avoid warning
|
32 |
+
self.generate_during_eval = False # disable at evaluation
|
33 |
+
self.label_pad_token_id = IGNORE_INDEX
|
34 |
+
self.padding_value = 0
|
35 |
+
self.is_encoder_decoder = model.config.is_encoder_decoder
|
36 |
+
self.precompute_ref_log_probs = False
|
37 |
+
self._precomputed_train_ref_log_probs = False
|
38 |
+
self._precomputed_eval_ref_log_probs = False
|
39 |
+
self._peft_has_been_casted_to_bf16 = False
|
40 |
+
|
41 |
+
self.ref_model = ref_model
|
42 |
+
self.beta = beta
|
43 |
+
self.label_smoothing = 0
|
44 |
+
self.loss_type = loss_type
|
45 |
+
self.ftx_gamma = ftx_gamma
|
46 |
+
self._stored_metrics = defaultdict(lambda: defaultdict(list))
|
47 |
+
|
48 |
+
Trainer.__init__(self, model=model, **kwargs)
|
49 |
+
if not hasattr(self, "accelerator"):
|
50 |
+
raise AttributeError("Please update `transformers`.")
|
51 |
+
|
52 |
+
if ref_model is not None:
|
53 |
+
if self.is_deepspeed_enabled:
|
54 |
+
if not (
|
55 |
+
getattr(ref_model, "is_loaded_in_8bit", False)
|
56 |
+
or getattr(ref_model, "is_loaded_in_4bit", False)
|
57 |
+
): # quantized models are already set on the correct device
|
58 |
+
self.ref_model = self._prepare_deepspeed(self.ref_model)
|
59 |
+
else:
|
60 |
+
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
61 |
+
|
62 |
+
def sft_loss(
|
63 |
+
self,
|
64 |
+
chosen_logits: torch.FloatTensor,
|
65 |
+
chosen_labels: torch.LongTensor
|
66 |
+
) -> torch.Tensor:
|
67 |
+
r"""
|
68 |
+
Computes supervised cross-entropy loss of given labels under the given logits.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
A tensor of shape (batch_size,) containing the cross-entropy loss of each samples.
|
72 |
+
"""
|
73 |
+
all_logps = self.get_batch_logps(
|
74 |
+
chosen_logits,
|
75 |
+
chosen_labels,
|
76 |
+
average_log_prob=True
|
77 |
+
)
|
78 |
+
return -all_logps
|
79 |
+
|
80 |
+
def concatenated_forward(
|
81 |
+
self,
|
82 |
+
model: "PreTrainedModel",
|
83 |
+
batch: Dict[str, torch.Tensor]
|
84 |
+
) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
|
85 |
+
batch_copied = BatchEncoding({k: v.detach().clone() for k, v in batch.items()}) # avoid error
|
86 |
+
|
87 |
+
all_logits = model(
|
88 |
+
input_ids=batch_copied["input_ids"],
|
89 |
+
attention_mask=batch_copied["attention_mask"],
|
90 |
+
return_dict=True
|
91 |
+
).logits.to(torch.float32)
|
92 |
+
|
93 |
+
all_logps = self.get_batch_logps(
|
94 |
+
all_logits,
|
95 |
+
batch["labels"],
|
96 |
+
average_log_prob=False
|
97 |
+
)
|
98 |
+
batch_size = batch["input_ids"].size(0) // 2
|
99 |
+
chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0)
|
100 |
+
chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0)
|
101 |
+
return chosen_logps, rejected_logps, chosen_logits, rejected_logits
|
102 |
+
|
103 |
+
def get_batch_loss_metrics(
|
104 |
+
self,
|
105 |
+
model: "PreTrainedModel",
|
106 |
+
batch: Dict[str, torch.Tensor],
|
107 |
+
train_eval: Optional[Literal["train", "eval"]] = "train"
|
108 |
+
) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
|
109 |
+
r"""
|
110 |
+
Computes the DPO loss and other metrics for the given batch of inputs for train or test.
|
111 |
+
"""
|
112 |
+
metrics = {}
|
113 |
+
(
|
114 |
+
policy_chosen_logps,
|
115 |
+
policy_rejected_logps,
|
116 |
+
policy_chosen_logits,
|
117 |
+
policy_rejected_logits,
|
118 |
+
) = self.concatenated_forward(model, batch)
|
119 |
+
with torch.no_grad():
|
120 |
+
if self.ref_model is None:
|
121 |
+
with self.accelerator.unwrap_model(self.model).disable_adapter():
|
122 |
+
(
|
123 |
+
reference_chosen_logps,
|
124 |
+
reference_rejected_logps,
|
125 |
+
_,
|
126 |
+
_,
|
127 |
+
) = self.concatenated_forward(self.model, batch)
|
128 |
+
else:
|
129 |
+
(
|
130 |
+
reference_chosen_logps,
|
131 |
+
reference_rejected_logps,
|
132 |
+
_,
|
133 |
+
_,
|
134 |
+
) = self.concatenated_forward(self.ref_model, batch)
|
135 |
+
|
136 |
+
losses, chosen_rewards, rejected_rewards = self.dpo_loss(
|
137 |
+
policy_chosen_logps,
|
138 |
+
policy_rejected_logps,
|
139 |
+
reference_chosen_logps,
|
140 |
+
reference_rejected_logps,
|
141 |
+
)
|
142 |
+
if self.ftx_gamma > 1e-6:
|
143 |
+
batch_size = batch["input_ids"].size(0) // 2
|
144 |
+
chosen_labels, _ = batch["labels"].split(batch_size, dim=0)
|
145 |
+
losses += self.ftx_gamma * self.sft_loss(policy_chosen_logits, chosen_labels)
|
146 |
+
|
147 |
+
reward_accuracies = (chosen_rewards > rejected_rewards).float()
|
148 |
+
|
149 |
+
prefix = "eval_" if train_eval == "eval" else ""
|
150 |
+
metrics[f"{prefix}rewards/chosen"] = chosen_rewards.cpu().mean()
|
151 |
+
metrics[f"{prefix}rewards/rejected"] = rejected_rewards.cpu().mean()
|
152 |
+
metrics[f"{prefix}rewards/accuracies"] = reward_accuracies.cpu().mean()
|
153 |
+
metrics[f"{prefix}rewards/margins"] = (chosen_rewards - rejected_rewards).cpu().mean()
|
154 |
+
metrics[f"{prefix}logps/rejected"] = policy_rejected_logps.detach().cpu().mean()
|
155 |
+
metrics[f"{prefix}logps/chosen"] = policy_chosen_logps.detach().cpu().mean()
|
156 |
+
metrics[f"{prefix}logits/rejected"] = policy_rejected_logits.detach().cpu().mean()
|
157 |
+
metrics[f"{prefix}logits/chosen"] = policy_chosen_logits.detach().cpu().mean()
|
158 |
+
|
159 |
+
return losses.mean(), metrics
|
LLM-Detector-V7-11w/src/llmtuner/train/dpo/workflow.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py
|
2 |
+
|
3 |
+
from typing import TYPE_CHECKING, Optional, List
|
4 |
+
from transformers import Seq2SeqTrainingArguments
|
5 |
+
|
6 |
+
from llmtuner.data import get_dataset, preprocess_dataset, split_dataset
|
7 |
+
from llmtuner.extras.constants import IGNORE_INDEX
|
8 |
+
from llmtuner.extras.ploting import plot_loss
|
9 |
+
from llmtuner.hparams import ModelArguments
|
10 |
+
from llmtuner.model import load_model_and_tokenizer
|
11 |
+
from llmtuner.train.dpo.collator import DPODataCollatorWithPadding
|
12 |
+
from llmtuner.train.dpo.trainer import CustomDPOTrainer
|
13 |
+
from llmtuner.train.utils import create_modelcard_and_push, create_ref_model
|
14 |
+
|
15 |
+
if TYPE_CHECKING:
|
16 |
+
from transformers import TrainerCallback
|
17 |
+
from llmtuner.hparams import DataArguments, FinetuningArguments
|
18 |
+
|
19 |
+
|
20 |
+
def run_dpo(
|
21 |
+
model_args: "ModelArguments",
|
22 |
+
data_args: "DataArguments",
|
23 |
+
training_args: "Seq2SeqTrainingArguments",
|
24 |
+
finetuning_args: "FinetuningArguments",
|
25 |
+
callbacks: Optional[List["TrainerCallback"]] = None
|
26 |
+
):
|
27 |
+
dataset = get_dataset(model_args, data_args)
|
28 |
+
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train)
|
29 |
+
dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="rm")
|
30 |
+
data_collator = DPODataCollatorWithPadding(
|
31 |
+
tokenizer=tokenizer,
|
32 |
+
pad_to_multiple_of=8,
|
33 |
+
label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id
|
34 |
+
)
|
35 |
+
|
36 |
+
# Create reference model
|
37 |
+
if finetuning_args.ref_model is None and (not training_args.do_train): # use the model itself
|
38 |
+
ref_model = model
|
39 |
+
else:
|
40 |
+
ref_model = create_ref_model(model_args, finetuning_args)
|
41 |
+
|
42 |
+
# Update arguments
|
43 |
+
training_args_dict = training_args.to_dict()
|
44 |
+
training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset
|
45 |
+
training_args = Seq2SeqTrainingArguments(**training_args_dict)
|
46 |
+
|
47 |
+
# Initialize our Trainer
|
48 |
+
trainer = CustomDPOTrainer(
|
49 |
+
beta=finetuning_args.dpo_beta,
|
50 |
+
loss_type=finetuning_args.dpo_loss,
|
51 |
+
ftx_gamma=finetuning_args.dpo_ftx,
|
52 |
+
model=model,
|
53 |
+
ref_model=ref_model,
|
54 |
+
args=training_args,
|
55 |
+
tokenizer=tokenizer,
|
56 |
+
data_collator=data_collator,
|
57 |
+
callbacks=callbacks,
|
58 |
+
**split_dataset(dataset, data_args, training_args)
|
59 |
+
)
|
60 |
+
|
61 |
+
# Training
|
62 |
+
if training_args.do_train:
|
63 |
+
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
|
64 |
+
trainer.save_model()
|
65 |
+
trainer.log_metrics("train", train_result.metrics)
|
66 |
+
trainer.save_metrics("train", train_result.metrics)
|
67 |
+
trainer.save_state()
|
68 |
+
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
69 |
+
plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
|
70 |
+
|
71 |
+
# Evaluation
|
72 |
+
if training_args.do_eval:
|
73 |
+
metrics = trainer.evaluate(metric_key_prefix="eval")
|
74 |
+
if id(model) == id(ref_model): # unable to compute rewards without a reference model
|
75 |
+
remove_keys = [key for key in metrics.keys() if "rewards" in key]
|
76 |
+
for key in remove_keys:
|
77 |
+
metrics.pop(key)
|
78 |
+
trainer.log_metrics("eval", metrics)
|
79 |
+
trainer.save_metrics("eval", metrics)
|
80 |
+
|
81 |
+
# Create model card
|
82 |
+
create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
|
LLM-Detector-V7-11w/src/llmtuner/train/ppo/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.train.ppo.workflow import run_ppo
|
LLM-Detector-V7-11w/src/llmtuner/train/ppo/trainer.py
ADDED
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import math
|
4 |
+
import torch
|
5 |
+
from tqdm import tqdm
|
6 |
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
|
7 |
+
|
8 |
+
from transformers import GenerationConfig, Trainer, TrainerState, TrainerControl
|
9 |
+
from transformers.utils import WEIGHTS_NAME, SAFE_WEIGHTS_NAME
|
10 |
+
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
11 |
+
from transformers.trainer_pt_utils import remove_dummy_checkpoint
|
12 |
+
|
13 |
+
from trl import PPOTrainer
|
14 |
+
from trl.core import PPODecorators, logprobs_from_logits
|
15 |
+
|
16 |
+
from llmtuner.extras.callbacks import LogCallback, FixValueHeadModelCallback
|
17 |
+
from llmtuner.extras.logging import get_logger
|
18 |
+
from llmtuner.extras.misc import AverageMeter, count_parameters, get_logits_processor
|
19 |
+
from llmtuner.train.ppo.utils import dump_layernorm, get_rewards_from_server, restore_layernorm, replace_model
|
20 |
+
|
21 |
+
if TYPE_CHECKING:
|
22 |
+
from transformers import Seq2SeqTrainingArguments, TrainerCallback
|
23 |
+
from trl import AutoModelForCausalLMWithValueHead
|
24 |
+
from llmtuner.hparams import ModelArguments, FinetuningArguments, GeneratingArguments
|
25 |
+
|
26 |
+
|
27 |
+
logger = get_logger(__name__)
|
28 |
+
|
29 |
+
|
30 |
+
class CustomPPOTrainer(PPOTrainer, Trainer):
|
31 |
+
r"""
|
32 |
+
Inherits PPOTrainer.
|
33 |
+
"""
|
34 |
+
|
35 |
+
def __init__(
|
36 |
+
self,
|
37 |
+
model_args: "ModelArguments",
|
38 |
+
training_args: "Seq2SeqTrainingArguments",
|
39 |
+
finetuning_args: "FinetuningArguments",
|
40 |
+
generating_args: "GeneratingArguments",
|
41 |
+
callbacks: List["TrainerCallback"],
|
42 |
+
reward_model: "AutoModelForCausalLMWithValueHead",
|
43 |
+
**kwargs
|
44 |
+
):
|
45 |
+
PPOTrainer.__init__(self, **kwargs)
|
46 |
+
|
47 |
+
self.args = training_args
|
48 |
+
self.model_args = model_args
|
49 |
+
self.finetuning_args = finetuning_args
|
50 |
+
self.reward_model = reward_model
|
51 |
+
|
52 |
+
self.generation_config = GenerationConfig(
|
53 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
54 |
+
eos_token_id=[self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids,
|
55 |
+
**generating_args.to_dict()
|
56 |
+
)
|
57 |
+
|
58 |
+
self.state = TrainerState()
|
59 |
+
self.control = TrainerControl()
|
60 |
+
self.is_deepspeed_enabled = self.accelerator.distributed_type == "DEEPSPEED" and hasattr(
|
61 |
+
self.accelerator.state, "deepspeed_plugin"
|
62 |
+
)
|
63 |
+
self.log_callback, self.save_callback = callbacks[0], callbacks[1]
|
64 |
+
assert isinstance(self.log_callback, LogCallback) and isinstance(self.save_callback, FixValueHeadModelCallback)
|
65 |
+
|
66 |
+
if self.args.max_steps > 0:
|
67 |
+
logger.info("max_steps is given, it will override any value given in num_train_epochs")
|
68 |
+
|
69 |
+
if finetuning_args.reward_model_type == "full":
|
70 |
+
if self.is_deepspeed_enabled:
|
71 |
+
if not (
|
72 |
+
getattr(reward_model.pretrained_model, "is_loaded_in_8bit", False)
|
73 |
+
or getattr(reward_model.pretrained_model, "is_loaded_in_4bit", False)
|
74 |
+
): # quantized models are already set on the correct device
|
75 |
+
self.reward_model = self._prepare_deepspeed(self.reward_model)
|
76 |
+
else:
|
77 |
+
self.reward_model = self.accelerator.prepare_model(self.reward_model, evaluation_mode=True)
|
78 |
+
|
79 |
+
def ppo_train(self, resume_from_checkpoint: Optional[str] = None) -> None:
|
80 |
+
r"""
|
81 |
+
Implements training loop for the PPO stage, like _inner_training_loop() in Huggingface's Trainer.
|
82 |
+
"""
|
83 |
+
if resume_from_checkpoint is not None:
|
84 |
+
raise ValueError("`resume_from_checkpoint` will be supported in the future version.")
|
85 |
+
|
86 |
+
total_train_batch_size = (
|
87 |
+
self.args.per_device_train_batch_size
|
88 |
+
* self.args.gradient_accumulation_steps
|
89 |
+
* self.finetuning_args.ppo_buffer_size
|
90 |
+
* self.args.world_size
|
91 |
+
)
|
92 |
+
if self.args.max_steps > 0:
|
93 |
+
num_examples = total_train_batch_size * self.args.max_steps
|
94 |
+
num_train_epochs = sys.maxsize
|
95 |
+
max_steps = self.args.max_steps
|
96 |
+
steps_in_epoch = self.args.max_steps
|
97 |
+
else:
|
98 |
+
len_dataloader = len(self.dataloader)
|
99 |
+
num_examples = len(self.dataset)
|
100 |
+
num_train_epochs = self.args.num_train_epochs
|
101 |
+
max_steps = math.ceil(num_train_epochs * len_dataloader)
|
102 |
+
steps_in_epoch = len_dataloader
|
103 |
+
|
104 |
+
self.state.max_steps = max_steps
|
105 |
+
self.state.num_train_epochs = num_train_epochs
|
106 |
+
self.state.is_local_process_zero = self.is_local_process_zero()
|
107 |
+
self.state.is_world_process_zero = self.is_world_process_zero()
|
108 |
+
|
109 |
+
if self.is_world_process_zero():
|
110 |
+
logger.info("***** Running training *****")
|
111 |
+
logger.info(" Num examples = {}".format(num_examples))
|
112 |
+
logger.info(" Num Epochs = {}".format(num_train_epochs))
|
113 |
+
logger.info(" Instantaneous batch size per device = {}".format(self.args.per_device_train_batch_size))
|
114 |
+
logger.info(" Total train batch size (w. parallel, buffer, distributed & accumulation) = {}".format(
|
115 |
+
total_train_batch_size
|
116 |
+
))
|
117 |
+
logger.info(" Gradient Accumulation steps = {}".format(self.args.gradient_accumulation_steps))
|
118 |
+
logger.info(" Num optimization epochs per batch = {}".format(self.finetuning_args.ppo_epochs))
|
119 |
+
logger.info(" Total training steps = {}".format(max_steps))
|
120 |
+
logger.info(" Number of trainable parameters = {}".format(count_parameters(self.model)[0]))
|
121 |
+
|
122 |
+
unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
|
123 |
+
dataiter = iter(self.dataloader)
|
124 |
+
loss_meter = AverageMeter()
|
125 |
+
reward_meter = AverageMeter()
|
126 |
+
self.log_callback.on_train_begin(self.args, self.state, self.control)
|
127 |
+
|
128 |
+
for step in tqdm(range(max_steps), disable=not self.is_local_process_zero()):
|
129 |
+
try:
|
130 |
+
batch = next(dataiter)
|
131 |
+
except StopIteration:
|
132 |
+
dataiter = iter(self.dataloader)
|
133 |
+
batch = next(dataiter)
|
134 |
+
|
135 |
+
# Cast to inference mode
|
136 |
+
unwrapped_model.gradient_checkpointing_disable()
|
137 |
+
unwrapped_model.config.use_cache = True
|
138 |
+
self.model.eval()
|
139 |
+
|
140 |
+
# Get inputs
|
141 |
+
self.tokenizer.padding_side = "right" # change padding side
|
142 |
+
queries, responses, rewards = [], [], []
|
143 |
+
for idx in range(0, self.config.batch_size, self.config.mini_batch_size):
|
144 |
+
mini_batch_queries, mini_batch_responses = self.get_inputs(batch[idx:idx+self.config.mini_batch_size])
|
145 |
+
mini_batch_rewards = self.get_rewards(mini_batch_queries, mini_batch_responses, unwrapped_model)
|
146 |
+
queries.extend(mini_batch_queries)
|
147 |
+
responses.extend(mini_batch_responses)
|
148 |
+
rewards.extend(mini_batch_rewards)
|
149 |
+
|
150 |
+
# Cast to training mode
|
151 |
+
unwrapped_model.gradient_checkpointing_enable()
|
152 |
+
unwrapped_model.config.use_cache = False
|
153 |
+
self.model.train()
|
154 |
+
|
155 |
+
# Run PPO step
|
156 |
+
stats = self.step(queries, responses, rewards)
|
157 |
+
self.tokenizer.padding_side = "left" # restore padding side
|
158 |
+
loss_meter.update(float(stats["ppo/loss/total"]), n=len(rewards))
|
159 |
+
reward_meter.update(torch.stack(rewards).mean().item(), n=len(rewards))
|
160 |
+
|
161 |
+
if self.config.log_with is not None:
|
162 |
+
try:
|
163 |
+
batch["query"] = self.tokenizer.batch_decode(queries, skip_special_tokens=True)
|
164 |
+
batch["response"] = self.tokenizer.batch_decode(responses, skip_special_tokens=True)
|
165 |
+
self.log_stats(stats, batch, rewards)
|
166 |
+
except:
|
167 |
+
logger.warning("Failed to save stats due to unknown errors.")
|
168 |
+
|
169 |
+
self.state.global_step += 1
|
170 |
+
self.log_callback.on_step_end(self.args, self.state, self.control)
|
171 |
+
|
172 |
+
if self.is_local_process_zero() and (step+1) % self.args.logging_steps == 0:
|
173 |
+
logs = dict(
|
174 |
+
loss=round(loss_meter.avg, 4),
|
175 |
+
reward=round(reward_meter.avg, 4),
|
176 |
+
learning_rate=stats["ppo/learning_rate"],
|
177 |
+
epoch=round(step / steps_in_epoch, 2)
|
178 |
+
)
|
179 |
+
tqdm.write(str(logs))
|
180 |
+
logs["step"] = step
|
181 |
+
self.state.log_history.append(logs)
|
182 |
+
self.log_callback.on_log(self.args, self.state, self.control)
|
183 |
+
loss_meter.reset()
|
184 |
+
reward_meter.reset()
|
185 |
+
|
186 |
+
if (step+1) % self.args.save_steps == 0: # save checkpoint
|
187 |
+
self.save_model(os.path.join(
|
188 |
+
self.args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, self.state.global_step)
|
189 |
+
))
|
190 |
+
self.save_callback.on_save(
|
191 |
+
self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model)
|
192 |
+
)
|
193 |
+
|
194 |
+
if self.control.should_epoch_stop or self.control.should_training_stop:
|
195 |
+
break
|
196 |
+
|
197 |
+
self.log_callback.on_train_end(self.args, self.state, self.control)
|
198 |
+
self.save_callback.on_train_end(
|
199 |
+
self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model)
|
200 |
+
)
|
201 |
+
|
202 |
+
@torch.no_grad()
|
203 |
+
def get_inputs(self, batch: Dict[str, torch.Tensor]) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
|
204 |
+
r"""
|
205 |
+
Generates model's responses given queries.
|
206 |
+
"""
|
207 |
+
if self.model_args.upcast_layernorm:
|
208 |
+
layernorm_params = dump_layernorm(self.model)
|
209 |
+
|
210 |
+
if batch["input_ids"].size(0) == 1: # handle llama2 ppo with gradient accumulation > 1
|
211 |
+
start_index = (batch["input_ids"][0] != self.tokenizer.pad_token_id).nonzero()[0].item()
|
212 |
+
for k, v in batch.items():
|
213 |
+
batch[k] = v[:, start_index:]
|
214 |
+
|
215 |
+
unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
|
216 |
+
generate_output: torch.Tensor = unwrapped_model.generate(
|
217 |
+
generation_config=self.generation_config,
|
218 |
+
logits_processor=get_logits_processor(),
|
219 |
+
**batch
|
220 |
+
)
|
221 |
+
|
222 |
+
if self.model_args.upcast_layernorm:
|
223 |
+
restore_layernorm(self.model, layernorm_params)
|
224 |
+
|
225 |
+
query = batch["input_ids"].detach().cpu()
|
226 |
+
response = generate_output[:, batch["input_ids"].size(-1):].detach().cpu()
|
227 |
+
queries, responses = [], []
|
228 |
+
for i in range(len(query)):
|
229 |
+
query_start_index = (query[i] != self.tokenizer.pad_token_id).nonzero()[0].item()
|
230 |
+
response_index = (response[i] != self.tokenizer.pad_token_id).nonzero()
|
231 |
+
|
232 |
+
if len(response_index) == 0:
|
233 |
+
response_length = 1 # allow empty response
|
234 |
+
else:
|
235 |
+
response_length = response_index[-1].item() + 1
|
236 |
+
|
237 |
+
queries.append(query[i, query_start_index:]) # remove padding from left
|
238 |
+
responses.append(response[i, :response_length]) # remove padding from right
|
239 |
+
|
240 |
+
return queries, responses
|
241 |
+
|
242 |
+
@torch.no_grad()
|
243 |
+
def get_rewards(
|
244 |
+
self,
|
245 |
+
queries: List[torch.Tensor],
|
246 |
+
responses: List[torch.Tensor],
|
247 |
+
unwrapped_model: "AutoModelForCausalLMWithValueHead"
|
248 |
+
) -> List[torch.Tensor]:
|
249 |
+
r"""
|
250 |
+
Computes scores using given reward model.
|
251 |
+
|
252 |
+
Both inputs and outputs are put on CPU.
|
253 |
+
"""
|
254 |
+
if self.finetuning_args.reward_model_type == "api":
|
255 |
+
token_ids = [torch.cat((q, r), dim=-1).tolist() for q, r in zip(queries, responses)]
|
256 |
+
messages = self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
|
257 |
+
return get_rewards_from_server(self.reward_model, messages)
|
258 |
+
|
259 |
+
if self.finetuning_args.reward_model_type == "lora":
|
260 |
+
replace_model(unwrapped_model, target="reward")
|
261 |
+
reward_model = self.model
|
262 |
+
else:
|
263 |
+
reward_model = self.reward_model
|
264 |
+
|
265 |
+
batch = self.prepare_model_inputs(queries, responses)
|
266 |
+
|
267 |
+
with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16
|
268 |
+
_, _, values = reward_model(**batch, output_hidden_states=True, return_dict=True)
|
269 |
+
|
270 |
+
if getattr(unwrapped_model.config, "model_type", None) == "chatglm": # assume same architecture
|
271 |
+
values = torch.transpose(values, 0, 1)
|
272 |
+
|
273 |
+
rewards = []
|
274 |
+
for i in range(values.size(0)):
|
275 |
+
end_indexes = (batch["input_ids"][i] != self.tokenizer.pad_token_id).nonzero()
|
276 |
+
end_index = end_indexes[-1].item() if len(end_indexes) else 0
|
277 |
+
rewards.append(values[i, end_index].float().detach().cpu()) # use fp32 type
|
278 |
+
|
279 |
+
if self.finetuning_args.reward_model_type == "lora":
|
280 |
+
replace_model(unwrapped_model, target="default")
|
281 |
+
|
282 |
+
return rewards
|
283 |
+
|
284 |
+
@PPODecorators.empty_device_cache()
|
285 |
+
def batched_forward_pass(
|
286 |
+
self,
|
287 |
+
model: "AutoModelForCausalLMWithValueHead",
|
288 |
+
queries: torch.Tensor,
|
289 |
+
responses: torch.Tensor,
|
290 |
+
model_inputs: dict,
|
291 |
+
return_logits: Optional[bool] = False,
|
292 |
+
response_masks: Optional[torch.Tensor] = None
|
293 |
+
):
|
294 |
+
r"""
|
295 |
+
Calculates model outputs in multiple batches.
|
296 |
+
|
297 |
+
Subclass and override to inject custom behavior.
|
298 |
+
"""
|
299 |
+
bs = len(queries)
|
300 |
+
fbs = self.config.mini_batch_size
|
301 |
+
all_logprobs = []
|
302 |
+
all_logits = []
|
303 |
+
all_masks = []
|
304 |
+
all_values = []
|
305 |
+
|
306 |
+
for i in range(math.ceil(bs / fbs)):
|
307 |
+
input_kwargs = {key: value[i * fbs : (i + 1) * fbs] for key, value in model_inputs.items()}
|
308 |
+
query_batch = queries[i * fbs : (i + 1) * fbs]
|
309 |
+
response_batch = responses[i * fbs : (i + 1) * fbs]
|
310 |
+
if response_masks is not None:
|
311 |
+
response_masks_batch = response_masks[i * fbs : (i + 1) * fbs]
|
312 |
+
input_ids = input_kwargs["input_ids"]
|
313 |
+
attention_mask = input_kwargs["attention_mask"]
|
314 |
+
|
315 |
+
with torch.cuda.amp.autocast(dtype=self.model_args.compute_dtype): # support bf16
|
316 |
+
logits, _, values = model(**input_kwargs)
|
317 |
+
|
318 |
+
unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model)
|
319 |
+
if getattr(unwrapped_model.config, "model_type", None) == "chatglm":
|
320 |
+
values = torch.transpose(values, 0, 1)
|
321 |
+
|
322 |
+
logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:])
|
323 |
+
masks = torch.zeros_like(attention_mask)
|
324 |
+
masks[:, :-1] = attention_mask[:, 1:]
|
325 |
+
|
326 |
+
for j in range(len(query_batch)):
|
327 |
+
start = len(query_batch[j]) - 1
|
328 |
+
if attention_mask[j, 0] == 0: # offset left padding
|
329 |
+
start += attention_mask[j, :].nonzero()[0].item()
|
330 |
+
end = start + len(response_batch[j])
|
331 |
+
|
332 |
+
if response_masks is not None:
|
333 |
+
response_masks_batch = torch.cat(
|
334 |
+
(torch.zeros_like(query_batch[j]), response_masks_batch[j])
|
335 |
+
)[1:]
|
336 |
+
|
337 |
+
masks[j, :start] = 0
|
338 |
+
masks[j, end:] = 0
|
339 |
+
if response_masks is not None:
|
340 |
+
masks[j, start:end] = masks[j, start:end] * response_masks_batch[j][start:end]
|
341 |
+
|
342 |
+
if return_logits:
|
343 |
+
all_logits.append(logits)
|
344 |
+
else:
|
345 |
+
del logits
|
346 |
+
|
347 |
+
all_values.append(values)
|
348 |
+
all_logprobs.append(logprobs)
|
349 |
+
all_masks.append(masks)
|
350 |
+
|
351 |
+
return (
|
352 |
+
torch.cat(all_logprobs),
|
353 |
+
torch.cat(all_logits)[:, :-1] if return_logits else None,
|
354 |
+
torch.cat(all_values)[:, :-1],
|
355 |
+
torch.cat(all_masks)[:, :-1],
|
356 |
+
)
|
357 |
+
|
358 |
+
def save_model(self, output_dir: Optional[str] = None) -> None:
|
359 |
+
r"""
|
360 |
+
Saves model checkpoint.
|
361 |
+
|
362 |
+
Subclass and override to inject custom behavior.
|
363 |
+
"""
|
364 |
+
if self.args.should_save:
|
365 |
+
try:
|
366 |
+
self._save(output_dir, state_dict=self.accelerator.get_state_dict(self.model))
|
367 |
+
except ValueError:
|
368 |
+
logger.warning(
|
369 |
+
" stage3_gather_16bit_weights_on_model_save=false. Saving the full checkpoint instead,"
|
370 |
+
" use zero_to_fp32.py to recover weights"
|
371 |
+
)
|
372 |
+
self._save(output_dir, state_dict={})
|
373 |
+
remove_dummy_checkpoint(True, output_dir, [WEIGHTS_NAME, SAFE_WEIGHTS_NAME])
|
374 |
+
self.model.save_checkpoint(output_dir)
|
LLM-Detector-V7-11w/src/llmtuner/train/ppo/utils.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import torch
|
3 |
+
from typing import TYPE_CHECKING, Dict, List, Literal, Optional
|
4 |
+
|
5 |
+
from llmtuner.extras.packages import is_requests_available
|
6 |
+
|
7 |
+
if TYPE_CHECKING:
|
8 |
+
from transformers import PreTrainedModel
|
9 |
+
from trl import AutoModelForCausalLMWithValueHead
|
10 |
+
|
11 |
+
if is_requests_available():
|
12 |
+
import requests
|
13 |
+
|
14 |
+
|
15 |
+
def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.Tensor]:
|
16 |
+
headers = {"Content-Type": "application/json"}
|
17 |
+
payload = {"model": "model", "messages": messages}
|
18 |
+
response = requests.post(server_url, json=payload, headers=headers)
|
19 |
+
rewards = json.loads(response.text)["scores"]
|
20 |
+
return torch.Tensor(rewards)
|
21 |
+
|
22 |
+
|
23 |
+
def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None:
|
24 |
+
if target == "reward": # save default head temporarily
|
25 |
+
valuehead_state_dict: Dict[str, torch.Tensor] = model.v_head.state_dict()
|
26 |
+
setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].detach().clone())
|
27 |
+
setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].detach().clone())
|
28 |
+
|
29 |
+
model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active
|
30 |
+
model.v_head.load_state_dict({
|
31 |
+
"summary.weight": model.get_buffer("{}_head_weight".format(target)).detach().clone(),
|
32 |
+
"summary.bias": model.get_buffer("{}_head_bias".format(target)).detach().clone()
|
33 |
+
})
|
34 |
+
|
35 |
+
|
36 |
+
def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]:
|
37 |
+
layer_norm_params = {}
|
38 |
+
for name, param in model.named_parameters():
|
39 |
+
if param.data.dtype == torch.float32:
|
40 |
+
layer_norm_params[name] = param.data.detach().clone()
|
41 |
+
param.data = param.data.to(model.config.torch_dtype)
|
42 |
+
|
43 |
+
return layer_norm_params
|
44 |
+
|
45 |
+
|
46 |
+
def restore_layernorm(model: "PreTrainedModel", layernorm_params: Optional[Dict[str, torch.Tensor]] = None) -> None:
|
47 |
+
for name, param in model.named_parameters():
|
48 |
+
if name in layernorm_params:
|
49 |
+
param.data = layernorm_params[name]
|
LLM-Detector-V7-11w/src/llmtuner/train/ppo/workflow.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py
|
2 |
+
|
3 |
+
import math
|
4 |
+
from trl import PPOConfig
|
5 |
+
from torch.optim import AdamW
|
6 |
+
from typing import TYPE_CHECKING, Optional, List
|
7 |
+
from transformers import DataCollatorWithPadding
|
8 |
+
from transformers.optimization import get_scheduler
|
9 |
+
|
10 |
+
from llmtuner.data import get_dataset, preprocess_dataset
|
11 |
+
from llmtuner.extras.callbacks import FixValueHeadModelCallback
|
12 |
+
from llmtuner.extras.misc import fix_valuehead_checkpoint
|
13 |
+
from llmtuner.extras.ploting import plot_loss
|
14 |
+
from llmtuner.model import load_model_and_tokenizer
|
15 |
+
from llmtuner.train.utils import create_ref_model, create_reward_model
|
16 |
+
from llmtuner.train.ppo.trainer import CustomPPOTrainer
|
17 |
+
|
18 |
+
if TYPE_CHECKING:
|
19 |
+
from transformers import Seq2SeqTrainingArguments, TrainerCallback
|
20 |
+
from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments
|
21 |
+
|
22 |
+
|
23 |
+
def run_ppo(
|
24 |
+
model_args: "ModelArguments",
|
25 |
+
data_args: "DataArguments",
|
26 |
+
training_args: "Seq2SeqTrainingArguments",
|
27 |
+
finetuning_args: "FinetuningArguments",
|
28 |
+
generating_args: "GeneratingArguments",
|
29 |
+
callbacks: Optional[List["TrainerCallback"]] = None
|
30 |
+
):
|
31 |
+
dataset = get_dataset(model_args, data_args)
|
32 |
+
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True)
|
33 |
+
dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="ppo")
|
34 |
+
|
35 |
+
tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training
|
36 |
+
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
37 |
+
|
38 |
+
# Create reference model and reward model
|
39 |
+
ref_model = create_ref_model(model_args, finetuning_args, add_valuehead=True)
|
40 |
+
reward_model = create_reward_model(model, model_args, finetuning_args)
|
41 |
+
|
42 |
+
# Create ppo config
|
43 |
+
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
44 |
+
ppo_config = PPOConfig(
|
45 |
+
model_name=model_args.model_name_or_path,
|
46 |
+
learning_rate=training_args.learning_rate,
|
47 |
+
mini_batch_size=training_args.per_device_train_batch_size,
|
48 |
+
batch_size=backward_batch_size * finetuning_args.ppo_buffer_size,
|
49 |
+
gradient_accumulation_steps=training_args.gradient_accumulation_steps,
|
50 |
+
ppo_epochs=finetuning_args.ppo_epochs,
|
51 |
+
max_grad_norm=training_args.max_grad_norm,
|
52 |
+
seed=training_args.seed,
|
53 |
+
optimize_device_cache=True,
|
54 |
+
target=finetuning_args.ppo_target,
|
55 |
+
log_with=finetuning_args.ppo_logger,
|
56 |
+
use_score_scaling=finetuning_args.ppo_score_norm,
|
57 |
+
use_score_norm=finetuning_args.ppo_score_norm,
|
58 |
+
whiten_rewards=finetuning_args.ppo_whiten_rewards,
|
59 |
+
accelerator_kwargs={"step_scheduler_with_optimizer": False}
|
60 |
+
)
|
61 |
+
|
62 |
+
# Create optimizer and scheduler
|
63 |
+
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate)
|
64 |
+
if training_args.max_steps > 0:
|
65 |
+
num_training_steps = training_args.max_steps
|
66 |
+
else:
|
67 |
+
total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size
|
68 |
+
num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size)
|
69 |
+
|
70 |
+
lr_scheduler = get_scheduler(
|
71 |
+
training_args.lr_scheduler_type,
|
72 |
+
optimizer=optimizer,
|
73 |
+
num_warmup_steps=training_args.get_warmup_steps(num_training_steps),
|
74 |
+
num_training_steps=num_training_steps
|
75 |
+
)
|
76 |
+
|
77 |
+
# Initialize our Trainer
|
78 |
+
ppo_trainer = CustomPPOTrainer(
|
79 |
+
model_args=model_args,
|
80 |
+
training_args=training_args,
|
81 |
+
finetuning_args=finetuning_args,
|
82 |
+
generating_args=generating_args,
|
83 |
+
callbacks=callbacks + [FixValueHeadModelCallback()],
|
84 |
+
reward_model=reward_model,
|
85 |
+
config=ppo_config,
|
86 |
+
model=model,
|
87 |
+
ref_model=ref_model,
|
88 |
+
tokenizer=tokenizer,
|
89 |
+
dataset=dataset,
|
90 |
+
data_collator=data_collator,
|
91 |
+
optimizer=optimizer,
|
92 |
+
lr_scheduler=lr_scheduler
|
93 |
+
)
|
94 |
+
|
95 |
+
# Training
|
96 |
+
if training_args.do_train:
|
97 |
+
ppo_trainer.ppo_train(resume_from_checkpoint=training_args.resume_from_checkpoint)
|
98 |
+
ppo_trainer.save_model()
|
99 |
+
if training_args.should_save:
|
100 |
+
fix_valuehead_checkpoint(model, training_args.output_dir, training_args.save_safetensors)
|
101 |
+
ppo_trainer.save_state() # must be called after save_model to have a folder
|
102 |
+
if ppo_trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
103 |
+
plot_loss(training_args.output_dir, keys=["loss", "reward"])
|
LLM-Detector-V7-11w/src/llmtuner/train/pt/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from llmtuner.train.pt.workflow import run_pt
|
LLM-Detector-V7-11w/src/llmtuner/train/pt/workflow.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py
|
2 |
+
|
3 |
+
import math
|
4 |
+
from typing import TYPE_CHECKING, Optional, List
|
5 |
+
from transformers import DataCollatorForLanguageModeling, Trainer
|
6 |
+
|
7 |
+
from llmtuner.data import get_dataset, preprocess_dataset, split_dataset
|
8 |
+
from llmtuner.extras.ploting import plot_loss
|
9 |
+
from llmtuner.model import load_model_and_tokenizer
|
10 |
+
from llmtuner.train.utils import create_modelcard_and_push
|
11 |
+
|
12 |
+
if TYPE_CHECKING:
|
13 |
+
from transformers import Seq2SeqTrainingArguments, TrainerCallback
|
14 |
+
from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments
|
15 |
+
|
16 |
+
|
17 |
+
def run_pt(
|
18 |
+
model_args: "ModelArguments",
|
19 |
+
data_args: "DataArguments",
|
20 |
+
training_args: "Seq2SeqTrainingArguments",
|
21 |
+
finetuning_args: "FinetuningArguments",
|
22 |
+
callbacks: Optional[List["TrainerCallback"]] = None
|
23 |
+
):
|
24 |
+
dataset = get_dataset(model_args, data_args)
|
25 |
+
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train)
|
26 |
+
dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="pt")
|
27 |
+
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
28 |
+
|
29 |
+
# Initialize our Trainer
|
30 |
+
trainer = Trainer(
|
31 |
+
model=model,
|
32 |
+
args=training_args,
|
33 |
+
tokenizer=tokenizer,
|
34 |
+
data_collator=data_collator,
|
35 |
+
callbacks=callbacks,
|
36 |
+
**split_dataset(dataset, data_args, training_args)
|
37 |
+
)
|
38 |
+
|
39 |
+
# Training
|
40 |
+
if training_args.do_train:
|
41 |
+
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
|
42 |
+
trainer.save_model()
|
43 |
+
trainer.log_metrics("train", train_result.metrics)
|
44 |
+
trainer.save_metrics("train", train_result.metrics)
|
45 |
+
trainer.save_state()
|
46 |
+
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
47 |
+
plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
|
48 |
+
|
49 |
+
# Evaluation
|
50 |
+
if training_args.do_eval:
|
51 |
+
metrics = trainer.evaluate(metric_key_prefix="eval")
|
52 |
+
try:
|
53 |
+
perplexity = math.exp(metrics["eval_loss"])
|
54 |
+
except OverflowError:
|
55 |
+
perplexity = float("inf")
|
56 |
+
|
57 |
+
metrics["perplexity"] = perplexity
|
58 |
+
trainer.log_metrics("eval", metrics)
|
59 |
+
trainer.save_metrics("eval", metrics)
|
60 |
+
|
61 |
+
# Create model card
|
62 |
+
create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
|