File size: 3,958 Bytes
156ae10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Install modules
%pip install --upgrade pip
%pip install torch torchdata transformers datasets loralib peft pandas numpy

# Import modules
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import pandas as pd
import torch

# Configuration values
model_name = "google/flan-t5-base" # Base model to use
training_file = "tarot_readings.csv" # CSV file to use
num_epochs = 3 # Number of iterations to train
num_rows = 500 # Number of rows to use for training
device = "cpu" # cpu or cuda

# Convert CSV file to tokens for training
def create_tarot_dataset(csv_file, tokenizer, num_rows=None):
    data = pd.read_csv(csv_file)
    
    if num_rows:
        data = data[:num_rows]

    def tokenize(row):
        prompt = "Give me a one paragraph tarot reading if I pull the cards {}, {} and {}.".format(row['Card 1'], row[' Card 2'], row[' Card 3'])
        reading = row[' Reading']
        
        inputs = tokenizer.encode_plus(prompt, add_special_tokens=True, padding='max_length', max_length=128, truncation=True, return_tensors='pt')
        target = tokenizer.encode_plus(reading, add_special_tokens=True, padding='max_length', max_length=128, truncation=True, return_tensors='pt')

        return {'input_ids': inputs['input_ids'].squeeze(), 'attention_mask': inputs['attention_mask'].squeeze(), 'target_ids': target['input_ids'].squeeze(), 'target_attention_mask': target['attention_mask'].squeeze()}

    dataset = data.apply(tokenize, axis=1).tolist()

    return dataset

# Train the model with dataset
def fine_tune_model(model, optimizer, batch, device):
    model.train()

    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['target_ids'].to(device)
    decoder_attention_mask = batch['target_attention_mask'].to(device)

    outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels, decoder_attention_mask=decoder_attention_mask)
    loss = outputs.loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()

# Run inference using the provided model and 3 tarot cards
def tacot_reading(model, tokenizer, card1, card2, card3):
    prompt = "Give me a one paragraph tarot reading if I pull the cards {}, {} and {}.".format(card1, card2, card3)

    inputs = tokenizer(prompt, return_tensors="pt")
    completion = tokenizer.decode(model.generate(inputs["input_ids"], max_new_tokens=1000)[0], skip_special_tokens=True)

    print("Prompt: {}".format(prompt))
    print("Response: {}".format(completion))
    print()

    return completion

print("* Loading model [{}]...".format(model_name))
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

print("* Running 3 inferences (pre-training)...")
tacot_reading(model, tokenizer, "The moon", "Two of Swords", "Three of Wands")
tacot_reading(model, tokenizer, "The hermit", "Ace of Pentacles", "Judgement")
tacot_reading(model, tokenizer, "Seven of Cups", "The chariot", "King of Swords")

print("* Creating dataset from [{}]...".format(training_file))
dataset = create_tarot_dataset(training_file, tokenizer, num_rows)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)

print("* Training model for {} epochs..".format(num_epochs))
optimizer = Adam(model.parameters(), lr=1e-4)
for epoch in range(num_epochs):
    loss = 0
    for batch in data_loader:
        loss += fine_tune_model(model, optimizer, batch, device)
    print("Epoch {} average loss: {}".format((epoch+1), (loss / len(data_loader))))

print("* Running 3 inferences (post-training)...")
tacot_reading(model, tokenizer, "The moon", "Two of Swords", "Three of Wands")
tacot_reading(model, tokenizer, "The hermit", "Ace of Pentacles", "Judgement")
tacot_reading(model, tokenizer, "Seven of Cups", "The chariot", "King of Swords")