RabbitRedux / setup_project.py
Canstralian's picture
Create setup_project.py
61cf18c verified
raw
history blame
3.75 kB
import os
import json
import torch
import random
# Define the directory structure
project_root = 'project_root'
model_dir = os.path.join(project_root, 'model')
tokenizer_dir = os.path.join(model_dir, 'tokenizer')
scripts_dir = os.path.join(project_root, 'scripts')
# Create directories
os.makedirs(tokenizer_dir, exist_ok=True)
os.makedirs(scripts_dir, exist_ok=True)
# Step 2: Create config.json
config = {
"model_type": "my_model_type",
"input_size": 100,
"hidden_size": 64,
"output_size": 10,
"num_layers": 1,
"dropout": 0.2
}
with open(os.path.join(model_dir, 'config.json'), 'w') as f:
json.dump(config, f)
# Step 3: Create a sample pytorch_model.bin
class SampleModel(torch.nn.Module):
def __init__(self):
super(SampleModel, self).__init__()
self.linear = torch.nn.Linear(100, 10)
def forward(self, x):
return self.linear(x)
# Initialize and save the model weights
model = SampleModel()
torch.save(model.state_dict(), os.path.join(model_dir, 'pytorch_model.bin'))
# Step 4: Create vocab.txt for tokenizer
vocab = ['hello', 'world', 'my', 'model', 'tokenization', 'is', 'important']
vocab_file_path = os.path.join(tokenizer_dir, 'vocab.txt')
with open(vocab_file_path, 'w') as f:
for token in vocab:
f.write(f"{token}\n")
# Step 5: Create tokenizer.json
tokenizer_config = {
"vocab_size": len(vocab),
"do_lower_case": True,
"tokenizer_type": "MyTokenizer"
}
with open(os.path.join(tokenizer_dir, 'tokenizer.json'), 'w') as f:
json.dump(tokenizer_config, f)
# Step 6: Create train.py
train_script = """import torch
import torch.nn as nn
import torch.optim as optim
class SampleModel(nn.Module):
def __init__(self):
super(SampleModel, self).__init__()
self.linear = nn.Linear(100, 10)
def forward(self, x):
return self.linear(x)
def train():
model = SampleModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Sample data
inputs = torch.randn(100, 100) # 100 samples
targets = torch.randint(0, 10, (100,)) # 100 random labels
# Training loop (simplified)
for epoch in range(5): # 5 epochs
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
if __name__ == "__main__":
train()
"""
with open(os.path.join(scripts_dir, 'train.py'), 'w') as f:
f.write(train_script)
# Step 7: Create inference.py
inference_script = """import torch
import torch.nn as nn
class SampleModel(nn.Module):
def __init__(self):
super(SampleModel, self).__init__()
self.linear = nn.Linear(100, 10)
def forward(self, x):
return self.linear(x)
def inference(input_data):
model = SampleModel()
model.load_state_dict(torch.load('model/pytorch_model.bin'))
model.eval()
with torch.no_grad():
output = model(input_data)
return output
if __name__ == "__main__":
# Sample inference
input_data = torch.randn(1, 100) # Single sample
output = inference(input_data)
print(output)
"""
with open(os.path.join(scripts_dir, 'inference.py'), 'w') as f:
f.write(inference_script)
# Step 8: Create utils.py
utils_script = """def load_model(model_path):
import torch
model = SampleModel()
model.load_state_dict(torch.load(model_path))
model.eval()
return model
def preprocess_input(input_data):
# Add input preprocessing logic here
return input_data
"""
with open(os.path.join(scripts_dir, 'utils.py'), 'w') as f:
f.write(utils_script)
print("Project structure created successfully!")