File size: 3,753 Bytes
61cf18c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import json
import torch
import random
# Define the directory structure
project_root = 'project_root'
model_dir = os.path.join(project_root, 'model')
tokenizer_dir = os.path.join(model_dir, 'tokenizer')
scripts_dir = os.path.join(project_root, 'scripts')
# Create directories
os.makedirs(tokenizer_dir, exist_ok=True)
os.makedirs(scripts_dir, exist_ok=True)
# Step 2: Create config.json
config = {
"model_type": "my_model_type",
"input_size": 100,
"hidden_size": 64,
"output_size": 10,
"num_layers": 1,
"dropout": 0.2
}
with open(os.path.join(model_dir, 'config.json'), 'w') as f:
json.dump(config, f)
# Step 3: Create a sample pytorch_model.bin
class SampleModel(torch.nn.Module):
def __init__(self):
super(SampleModel, self).__init__()
self.linear = torch.nn.Linear(100, 10)
def forward(self, x):
return self.linear(x)
# Initialize and save the model weights
model = SampleModel()
torch.save(model.state_dict(), os.path.join(model_dir, 'pytorch_model.bin'))
# Step 4: Create vocab.txt for tokenizer
vocab = ['hello', 'world', 'my', 'model', 'tokenization', 'is', 'important']
vocab_file_path = os.path.join(tokenizer_dir, 'vocab.txt')
with open(vocab_file_path, 'w') as f:
for token in vocab:
f.write(f"{token}\n")
# Step 5: Create tokenizer.json
tokenizer_config = {
"vocab_size": len(vocab),
"do_lower_case": True,
"tokenizer_type": "MyTokenizer"
}
with open(os.path.join(tokenizer_dir, 'tokenizer.json'), 'w') as f:
json.dump(tokenizer_config, f)
# Step 6: Create train.py
train_script = """import torch
import torch.nn as nn
import torch.optim as optim
class SampleModel(nn.Module):
def __init__(self):
super(SampleModel, self).__init__()
self.linear = nn.Linear(100, 10)
def forward(self, x):
return self.linear(x)
def train():
model = SampleModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Sample data
inputs = torch.randn(100, 100) # 100 samples
targets = torch.randint(0, 10, (100,)) # 100 random labels
# Training loop (simplified)
for epoch in range(5): # 5 epochs
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
if __name__ == "__main__":
train()
"""
with open(os.path.join(scripts_dir, 'train.py'), 'w') as f:
f.write(train_script)
# Step 7: Create inference.py
inference_script = """import torch
import torch.nn as nn
class SampleModel(nn.Module):
def __init__(self):
super(SampleModel, self).__init__()
self.linear = nn.Linear(100, 10)
def forward(self, x):
return self.linear(x)
def inference(input_data):
model = SampleModel()
model.load_state_dict(torch.load('model/pytorch_model.bin'))
model.eval()
with torch.no_grad():
output = model(input_data)
return output
if __name__ == "__main__":
# Sample inference
input_data = torch.randn(1, 100) # Single sample
output = inference(input_data)
print(output)
"""
with open(os.path.join(scripts_dir, 'inference.py'), 'w') as f:
f.write(inference_script)
# Step 8: Create utils.py
utils_script = """def load_model(model_path):
import torch
model = SampleModel()
model.load_state_dict(torch.load(model_path))
model.eval()
return model
def preprocess_input(input_data):
# Add input preprocessing logic here
return input_data
"""
with open(os.path.join(scripts_dir, 'utils.py'), 'w') as f:
f.write(utils_script)
print("Project structure created successfully!")
|