File size: 5,205 Bytes
20270d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import streamlit as st
import sparknlp

from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline

# Page configuration
st.set_page_config(
    layout="wide", 
    initial_sidebar_state="auto"
)

# CSS for styling
st.markdown("""

    <style>

        .main-title {

            font-size: 36px;

            color: #4A90E2;

            font-weight: bold;

            text-align: center;

        }

        .section {

            background-color: #f9f9f9;

            padding: 10px;

            border-radius: 10px;

            margin-top: 10px;

        }

        .section p, .section ul {

            color: #666666;

        }

        .scroll {

            overflow-x: auto;

            border: 1px solid #e6e9ef;

            border-radius: 0.25rem;

            padding: 1rem;

            margin-bottom: 2.5rem;

            white-space: pre-wrap;

        }

    </style>

""", unsafe_allow_html=True)

@st.cache_resource
def init_spark():
    return sparknlp.start()

@st.cache_resource
def create_pipeline(model, task):
    documentAssembler = DocumentAssembler() \
        .setInputCol("text") \
        .setOutputCol("documents")

    t5 = T5Transformer.pretrained(model) \
        .setTask(task) \
        .setInputCols(["documents"]) \
        .setMaxOutputLength(200) \
        .setOutputCol("transfers")

    pipeline = Pipeline().setStages([documentAssembler, t5])
    return pipeline

def fit_data(pipeline, data):
    df = spark.createDataFrame([[data]]).toDF("text")
    result = pipeline.fit(df).transform(df)
    return result.select('transfers.result').collect()

# Sidebar setup
model = st.sidebar.selectbox(
    "Choose the Pretrained Model",
    ['t5_informal_to_formal_styletransfer', 't5_formal_to_informal_styletransfer'],
    help="Select the model you want to use for style transfer."
)

# Reference notebook link in sidebar
st.sidebar.markdown('Reference notebook:')
st.sidebar.markdown(
    """

    <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb">

        <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>

    </a>

    """, 
    unsafe_allow_html=True
)

examples = {
    "t5_informal_to_formal_styletransfer": [
        "Who gives a crap about that anyway? It's not like it matters!",
        "Hiya, how ya doing? I haven't seen ya in forever!",
        "btw - ur face looks really familiar, have we met before?",
        "I looooooooooooooove going to the movies! It's my absolute favorite thing to do!",
        "Hey, what's up? Wanna grab a bite to eat later?",
        "Nah, I'm good. Don't feel like going out tonight.",
        "Yo, that was totally awesome! Can't believe we pulled it off!",
        "Check this out, it's totally epic! You've gotta see it!",
        "I'm so stoked for the weekend, can't wait to just chill!",
        "Dude, that party was lit! Had the best time ever!"
    ],
    "t5_formal_to_informal_styletransfer": [
        "Please leave the room now, as your presence is no longer required.",
        "Thank you very much, sir! Your kindness is greatly appreciated.",
        "It's a pleasure to meet you, and I look forward to our collaboration.",
        "I appreciate your assistance with this matter. It was very helpful.",
        "She understood the complex instructions very quickly and efficiently.",
        "He contracted a fever after returning from his overseas trip.",
        "He investigated his accountant thoroughly before making any decisions.",
        "Kindly refrain from making any noise during the presentation.",
        "She expressed her gratitude for the opportunity to work on this project.",
        "He was extremely punctual and arrived precisely at the scheduled time."
    ]
}

task_descriptions = {
    "t5_informal_to_formal_styletransfer": "transfer Casual to Formal:",
    "t5_formal_to_informal_styletransfer": "transfer Formal to Casual:"
}

# Set up the page layout
title = "T5 for Informal to Formal Style Transfer"
sub_title = "Effortlessly Transform Sentences and Explore Different Writing Styles"

st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)

# Text selection and analysis
selected_text = st.selectbox("Select an example", examples[model])
custom_input = st.text_input("Try it with your own sentence!")

text_to_analyze = custom_input if custom_input else selected_text

st.write('Text to analyze:')
st.markdown(f'<div class="scroll">{text_to_analyze}</div>', unsafe_allow_html=True)

# Initialize Spark and create pipeline
spark = init_spark()
pipeline = create_pipeline(model, task_descriptions[model])
output = fit_data(pipeline, text_to_analyze)

# Display transformed sentence
st.write("Predicted Sentence:")
output_text = "".join(output[0][0])
st.markdown(f'<div class="scroll">{output_text.title()}</div>', unsafe_allow_html=True)