File size: 3,753 Bytes
61cf18c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import json
import torch
import random

# Define the directory structure
project_root = 'project_root'
model_dir = os.path.join(project_root, 'model')
tokenizer_dir = os.path.join(model_dir, 'tokenizer')
scripts_dir = os.path.join(project_root, 'scripts')

# Create directories
os.makedirs(tokenizer_dir, exist_ok=True)
os.makedirs(scripts_dir, exist_ok=True)

# Step 2: Create config.json
config = {
    "model_type": "my_model_type",
    "input_size": 100,
    "hidden_size": 64,
    "output_size": 10,
    "num_layers": 1,
    "dropout": 0.2
}

with open(os.path.join(model_dir, 'config.json'), 'w') as f:
    json.dump(config, f)

# Step 3: Create a sample pytorch_model.bin
class SampleModel(torch.nn.Module):
    def __init__(self):
        super(SampleModel, self).__init__()
        self.linear = torch.nn.Linear(100, 10)

    def forward(self, x):
        return self.linear(x)

# Initialize and save the model weights
model = SampleModel()
torch.save(model.state_dict(), os.path.join(model_dir, 'pytorch_model.bin'))

# Step 4: Create vocab.txt for tokenizer
vocab = ['hello', 'world', 'my', 'model', 'tokenization', 'is', 'important']
vocab_file_path = os.path.join(tokenizer_dir, 'vocab.txt')
with open(vocab_file_path, 'w') as f:
    for token in vocab:
        f.write(f"{token}\n")

# Step 5: Create tokenizer.json
tokenizer_config = {
    "vocab_size": len(vocab),
    "do_lower_case": True,
    "tokenizer_type": "MyTokenizer"
}
with open(os.path.join(tokenizer_dir, 'tokenizer.json'), 'w') as f:
    json.dump(tokenizer_config, f)

# Step 6: Create train.py
train_script = """import torch
import torch.nn as nn
import torch.optim as optim

class SampleModel(nn.Module):
    def __init__(self):
        super(SampleModel, self).__init__()
        self.linear = nn.Linear(100, 10)

    def forward(self, x):
        return self.linear(x)

def train():
    model = SampleModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Sample data
    inputs = torch.randn(100, 100)  # 100 samples
    targets = torch.randint(0, 10, (100,))  # 100 random labels
    
    # Training loop (simplified)
    for epoch in range(5):  # 5 epochs
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

if __name__ == "__main__":
    train()
"""

with open(os.path.join(scripts_dir, 'train.py'), 'w') as f:
    f.write(train_script)

# Step 7: Create inference.py
inference_script = """import torch
import torch.nn as nn

class SampleModel(nn.Module):
    def __init__(self):
        super(SampleModel, self).__init__()
        self.linear = nn.Linear(100, 10)

    def forward(self, x):
        return self.linear(x)

def inference(input_data):
    model = SampleModel()
    model.load_state_dict(torch.load('model/pytorch_model.bin'))
    model.eval()
    with torch.no_grad():
        output = model(input_data)
    return output

if __name__ == "__main__":
    # Sample inference
    input_data = torch.randn(1, 100)  # Single sample
    output = inference(input_data)
    print(output)
"""

with open(os.path.join(scripts_dir, 'inference.py'), 'w') as f:
    f.write(inference_script)

# Step 8: Create utils.py
utils_script = """def load_model(model_path):
    import torch
    model = SampleModel()
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

def preprocess_input(input_data):
    # Add input preprocessing logic here
    return input_data
"""

with open(os.path.join(scripts_dir, 'utils.py'), 'w') as f:
    f.write(utils_script)

print("Project structure created successfully!")