myezrag

Running

App Files Files Community

ginipick commited on Oct 26, 2024

Commit

cc10093

verified ·

1 Parent(s): 5325327

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -735

app.py CHANGED Viewed

@@ -1,736 +1,2 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
 import os
-import pandas as pd
-from typing import List, Dict, Tuple
-import json
-import io
-import traceback
-import csv
-from openai import OpenAI
-from functools import lru_cache
-from concurrent.futures import ThreadPoolExecutor
-import math
-# CSS 설정
-css = """
-footer {
-    visibility: hidden;
-}
-#chatbot-container, #chatbot-data-upload {
-    height: 700px;
-    overflow-y: scroll;
-}
-#chatbot-container .message, #chatbot-data-upload .message {
-    font-size: 14px;
-}
-/* 입력창 배경색 및 글자색 변경 */
-textarea, input[type="text"] {
-    background-color: #ffffff;
-    color: #000000;
-}
-/* 파일 업로드 영역 높이 조절 */
-#parquet-upload-area {
-    max-height: 150px;
-    overflow-y: auto;
-}
-/* 초기 설명 글씨 크기 조절 */
-#initial-description {
-    font-size: 14px;
-}
-/* API Key 입력 섹션 스타일 */
-.api-key-section {
-    margin: 10px 0;
-    padding: 10px;
-    border: 1px solid #ddd;
-    border-radius: 5px;
-}
-.api-key-status {
-    margin-top: 5px;
-    font-weight: bold;
-}
-"""
-# 추론 API 클라이언트 설정
-hf_client = InferenceClient(
-    "CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")
-)
-def load_code(filename: str) -> str:
-    try:
-        with open(filename, 'r', encoding='utf-8') as file:
-            return file.read()
-    except FileNotFoundError:
-        return f"{filename} 파일을 찾을 수 없습니다."
-    except Exception as e:
-        return f"파일을 읽는 중 오류가 발생했습니다: {str(e)}"
-def load_parquet(filename: str) -> str:
-    try:
-        df = pd.read_parquet(filename, engine='pyarrow')
-        return df.head(10).to_markdown(index=False)
-    except FileNotFoundError:
-        return f"{filename} 파일을 찾을 수 없습니다."
-    except Exception as e:
-        return f"파일을 읽는 중 오류가 발생했습니다: {str(e)}"
-def clean_response(text: str) -> str:
-    """응답 텍스트 정제 함수"""
-    sentences = [s.strip() for s in text.split('.') if s.strip()]
-    unique_sentences = []
-    seen = set()
-    for sentence in sentences:
-        normalized = ' '.join(sentence.lower().split())
-        if normalized not in seen:
-            seen.add(normalized)
-            unique_sentences.append(sentence)
-    cleaned_text = '. '.join(unique_sentences)
-    if cleaned_text and not cleaned_text.endswith('.'):
-        cleaned_text += '.'
-    return cleaned_text
-def remove_duplicates(text: str) -> str:
-    """중복 문장 제거 함수"""
-    sentences = text.split('.')
-    unique_sentences = []
-    seen = set()
-    for sentence in sentences:
-        sentence = sentence.strip()
-        if sentence and sentence not in seen:
-            seen.add(sentence)
-            unique_sentences.append(sentence)
-    return '. '.join(unique_sentences)
-def upload_csv(file_path: str) -> Tuple[str, str]:
-    try:
-        df = pd.read_csv(file_path, sep=',')
-        required_columns = {'id', 'text', 'label', 'metadata'}
-        available_columns = set(df.columns)
-        missing_columns = required_columns - available_columns
-        if missing_columns:
-            return f"CSV 파일에 다음 필수 컬럼이 누락되었습니다: {', '.join(missing_columns)}", ""
-        df.drop_duplicates(inplace=True)
-        df.fillna('', inplace=True)
-        df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
-        parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
-        df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
-        return f"{parquet_filename} 파일이 성공적으로 업로드되고 변환되었습니다.", parquet_filename
-    except Exception as e:
-        return f"CSV 파일 업로드 및 변환 중 오류가 발생했습니다: {str(e)}", ""
-def upload_parquet(file_path: str) -> Tuple[str, str, str]:
-    try:
-        df = pd.read_parquet(file_path, engine='pyarrow')
-        data_info = {
-            "총 레코드 수": len(df),
-            "컬럼 목록": list(df.columns),
-            "데이터 타입": df.dtypes.to_dict(),
-            "결측치 정보": df.isnull().sum().to_dict()
-        }
-        summary = []
-        summary.append(f"### 데이터셋 기본 정보:")
-        summary.append(f"- 총 레코드 수: {data_info['총 레코드 수']}")
-        summary.append(f"- 컬럼 목록: {', '.join(data_info['컬럼 목록'])}")
-        summary.append("\n### 컬럼별 정보:")
-        for col in df.columns:
-            if df[col].dtype in ['int64', 'float64']:
-                stats = df[col].describe()
-                summary.append(f"\n{col} (수치형):")
-                summary.append(f"- 평균: {stats['mean']:.2f}")
-                summary.append(f"- 최소: {stats['min']}")
-                summary.append(f"- 최대: {stats['max']}")
-            elif df[col].dtype == 'object' or df[col].dtype == 'string':
-                unique_count = df[col].nunique()
-                summary.append(f"\n{col} (텍스트):")
-                summary.append(f"- 고유값 수: {unique_count}")
-                if unique_count < 10:
-                    value_counts = df[col].value_counts().head(5)
-                    summary.append("- 상위 5개 값:")
-                    for val, count in value_counts.items():
-                        summary.append(f"  • {val}: {count}개")
-        preview = df.head(10).to_markdown(index=False)
-        summary.append("\n### 데이터 미리보기:")
-        summary.append(preview)
-        parquet_content = "\n".join(summary)
-        parquet_json = df.to_json(orient='records', force_ascii=False)
-        return "Parquet 파일이 성공적으로 업로드되었습니다.", parquet_content, parquet_json
-    except Exception as e:
-        return f"Parquet 파일 업로드 중 오류가 발생했습니다: {str(e)}", "", ""
-def text_to_parquet(text: str) -> Tuple[str, str, str]:
-    try:
-        lines = [line.strip() for line in text.split('\n') if line.strip()]
-        data = []
-        for line in lines:
-            try:
-                import re
-                pattern = r'(\d+),([^,]+),([^,]+),(.+)'
-                match = re.match(pattern, line)
-                if match:
-                    id_val, text_val, label_val, metadata_val = match.groups()
-                    text_val = text_val.strip().strip('"')
-                    label_val = label_val.strip().strip('"')
-                    metadata_val = metadata_val.strip().strip('"')
-                    data.append({
-                        'id': int(id_val),
-                        'text': text_val,
-                        'label': label_val,
-                        'metadata': metadata_val
-                    })
-            except Exception as e:
-                print(f"라인 파싱 오류: {line}\n{str(e)}")
-                continue
-        if not data:
-            return "변환할 데이터가 없습니다.", "", ""
-        df = pd.DataFrame(data)
-        df = df.astype({
-            'id': 'int32',
-            'text': 'string',
-            'label': 'string',
-            'metadata': 'string'
-        })
-        parquet_filename = 'text_to_parquet.parquet'
-        df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
-        preview = df.to_markdown(index=False)
-        return (
-            f"{parquet_filename} 파일이 성공적으로 변환되었습니다. 총 {len(df)}개의 레코드가 처리되었습니다.",
-            preview,
-            parquet_filename
-        )
-    except Exception as e:
-        error_message = f"텍스트 변환 중 오류가 발생했습니다: {str(e)}"
-        print(f"{error_message}\n{traceback.format_exc()}")
-        return error_message, "", ""
-def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None, api_key: str = None) -> str:
-    if not api_key:
-        yield "⚠️ API Key가 설정되지 않았습니다. 서비스 이용을 위해 API Key를 입력해주세요."
-        return
-    # OpenAI 클라이언트 초기화
-    client = OpenAI(api_key=api_key)
-    system_prefix = """반드시 한글로 답변할 것. 너는 업로드된 데이터를 기반으로 질문에 답변하는 역할을 한다.
-주요 지침:
-1. 질문과 직접 관련된 내용만 간단명료하게 답변할 것
-2. 이전 답변과 중복되는 내용은 제외할 것
-3. 불필요한 예시나 부연 설명은 하지 말 것
-4. 동일한 내용을 다른 표현으로 반복하지 말 것
-5. 핵심 정보만 전달할 것
-"""
-    if parquet_data:
-        try:
-            df = pd.read_json(io.StringIO(parquet_data))
-            data_summary = df.describe(include='all').to_string()
-            system_prefix += f"\n\n데이터 요약:\n{data_summary}"
-        except Exception as e:
-            print(f"데이터 로드 오류: {str(e)}")
-    messages = [{"role": "system", "content": system_prefix}]
-    recent_history = history[-3:] if history else []
-    for chat in recent_history:
-        messages.append({"role": chat["role"], "content": chat["content"]})
-    messages.append({"role": "user", "content": message})
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=messages,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            stream=True
-        )
-        full_response = ""
-        for chunk in response:
-            if chunk.choices[0].delta.content:
-                full_response += chunk.choices[0].delta.content
-                yield clean_response(full_response)
-    except Exception as e:
-        error_message = f"응답 생성 중 오류 발생: {str(e)}"
-        print(f"{error_message}\n{traceback.format_exc()}")
-        yield error_message
-def preprocess_text_with_llm(input_text: str, api_key: str = None) -> str:
-    if not api_key:
-        return "⚠️ API Key가 설정되지 않았습니다. 서비스 이용을 위해 API Key를 입력해주세요."
-    # OpenAI 클라이언트 초기화
-    client = OpenAI(api_key=api_key)
-    system_prompt = """반드시 한글(한국어)로 답변하시오. 당신은 데이터 전처리 전문가입니다. 입력된 텍스트를 CSV 데이터셋 형식으로 변환하세요.
-규칙:
-1. 출력 형식: id,text,label,metadata
-2. id: 1부터 시작하는 순차적 번호
-3. text: 의미 있는 단위로 분리된 텍스트
-4. label: 텍스트의 주제나 카테고리를 아래 기준으로 정확하게 한 개만 선택
-   - Historical_Figure (역사적 인물)
-   - Military_History (군사 역사)
-   - Technology (기술)
-   - Politics (정치)
-   - Culture (문화)
-5. metadata: 날짜, 출처 등 추가 정보"""
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4-0125-preview",
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": input_text}
-            ],
-            max_tokens=4000,
-            temperature=0.1,
-            stream=True
-        )
-        full_response = ""
-        for chunk in response:
-            if chunk.choices[0].delta.content:
-                full_response += chunk.choices[0].delta.content
-        processed_text = clean_response(full_response)
-        try:
-            from io import StringIO
-            import csv
-            csv.reader(StringIO(processed_text))
-            return processed_text
-        except csv.Error:
-            return "LLM이 올바른 CSV 형식을 생성하지 못했습니다. 다시 시도해주세요."
-    except Exception as e:
-        error_message = f"전처리 중 오류가 발생했습니다: {str(e)}"
-        print(error_message)
-        return error_message
-# Gradio Blocks 인터페이스 설정
-with gr.Blocks(css=css) as demo:
-    api_key_state = gr.State("")  # API 키를 저장할 State 추가
-    gr.Markdown("# MyEzRAG: LLM이 나만의 데이터로 학습한 콘텐츠 생성/답변", elem_id="initial-description")
-    # API 키 입력 섹션 추가
-    with gr.Row(elem_classes="api-key-section"):
-        with gr.Column(scale=3):
-            api_key_input = gr.Textbox(
-                label="OpenAI API Key",
-                placeholder="sk-...",
-                type="password",
-                show_label=True
-            )
-        with gr.Column(scale=1):
-            api_key_button = gr.Button("API Key 설정", variant="primary")
-    # API 키 상태 표시
-    api_key_status = gr.Markdown("⚠️ API Key가 설정되지 않았습니다. 서비스 이용을 위해 API Key를 입력해주세요.", elem_classes="api-key-status")
-    # API 키 설정 함수
-    def set_api_key(api_key: str):
-        if not api_key.strip():
-            return "⚠️ API Key가 설정되지 않았습니다. 서비스 이용을 위해 API Key를 입력해주세요.", ""
-        if not api_key.startswith("sk-"):
-            return "❌ 올바르지 않은 API Key 형식입니다. 다시 확인해주세요.", ""
-        return "✅ API Key가 성공적으로 설정되었습니다.", api_key
-    # API 키 설정 이벤트 연결
-    api_key_button.click(
-        set_api_key,
-        inputs=[api_key_input],
-        outputs=[api_key_status, api_key_state]
-    )
-    gr.Markdown(
-        "### '사용 방법' 탭을 통해 자세한 이용 방법을 참고하세요.\n"
-        "### Tip) '예제'를 통해 다양한 활용 방법을 체험하고 응용해 보세요, 데이터셋 업로드시 미리보기는 10건만 출력",
-        elem_id="initial-description"
-    )
-    # 첫 번째 탭: My 데이터셋+LLM
-    with gr.Tab("My 데이터셋+LLM"):
-        gr.Markdown("### LLM과 대화하기")
-        chatbot_data_upload = gr.Chatbot(label="챗봇", type="messages", elem_id="chatbot-data-upload")
-        msg_data_upload = gr.Textbox(label="메시지 입력", placeholder="여기에 메시지를 입력하세요...")
-        send_data_upload = gr.Button("전송")
-        with gr.Accordion("시스템 프롬프트 및 옵션 설정", open=False):
-            system_message = gr.Textbox(label="System Message", value="너는 AI 조언자 역할이다.")
-            max_tokens = gr.Slider(minimum=1, maximum=8000, value=1000, label="Max Tokens")
-            temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
-            top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
-        parquet_data_state = gr.State()
-        def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str, api_key: str):
-            if not api_key:
-                history = history or []
-                history.append({"role": "assistant", "content": "⚠️ API Key가 설정되지 않았습니다. 서비스 이용을 위해 API Key를 입력해주세요."})
-                yield history, ""
-                return
-            history = history or []
-            recent_questions = [chat['content'].strip().lower() for chat in history[-3:] if chat['role'] == 'user']
-            if message.strip().lower() in recent_questions:
-                yield history + [{"role": "assistant", "content": "동일한 질문이 최근에 있었습니다. 다른 질문을 해주세요."}], ""
-                return
-            try:
-                history.append({"role": "user", "content": message})
-                response_gen = respond(
-                    message,
-                    history,
-                    system_message,
-                    max_tokens,
-                    temperature=0.3,
-                    top_p=top_p,
-                    parquet_data=parquet_data,
-                    api_key=api_key
-                )
-                partial_response = ""
-                for partial in response_gen:
-                    partial_response = partial
-                    display_history = history + [{"role": "assistant", "content": partial_response}]
-                    yield display_history, ""
-                history.append({"role": "assistant", "content": partial_response})
-            except Exception as e:
-                response = f"오류 발생: {str(e)}"
-                history.append({"role": "assistant", "content": response})
-                yield history, ""
-        send_data_upload.click(
-            handle_message_data_upload,
-            inputs=[
-                msg_data_upload,
-                chatbot_data_upload,
-                system_message,
-                max_tokens,
-                temperature,
-                top_p,
-                parquet_data_state,
-                api_key_state,
-            ],
-            outputs=[chatbot_data_upload, msg_data_upload],
-            queue=True
-        )
-# 예제 추가
-        with gr.Accordion("예제", open=False):
-            gr.Examples(
-                examples=[
-                    ["업로드된 데이터셋에 대해 요약 설명하라."],
-                    ["업로드된 데이터셋 파일을 학습 데이터로 활용하여, 본 서비스를 SEO 최적화하여 블로그 포스트(개요, 배경 및 필요성, 기존 유사 제품/서비스와 비교하여 특장점, 활용처, 가치, 기대효과, 결론을 포함)로 4000 토큰 이상 작성하라"],
-                    ["업로드된 데이터셋 파일을 학습 데이터로 활용하여, 사용 방법과 차별점, 특징, 강점을 중심으로 4000 토큰 이상 유튜브 영상 스크립트 형태로 작성하라"],
-                    ["업로드된 데이터셋 파일을 학습 데이터로 활용하여, 제품 상세 페이지 형식의 내용을 4000 토큰 이상 자세히 설명하라"],
-                    ["업로드된 데이터셋 파일을 학습 데이터로 활용하여, FAQ 20건을 상세하게 작성하라. 4000토큰 이상 사용하라."],
-                    ["업로드된 데이터셋 파일을 학습 데이터로 활용하여, 특허 출원에 활용할 기술 및 비즈니스 모델 측면을 포함하여 특허 출원서 구성에 맞게 혁신적인 창의 발명 내용을 중심으로 4000 토큰 이상 작성하라."],
-                ],
-                inputs=msg_data_upload,
-                label="예제 선택",
-            )
-        # Parquet 파일 업로드
-        gr.Markdown("### Parquet 파일 업로드")
-        with gr.Row():
-            with gr.Column():
-                parquet_upload = gr.File(
-                    label="Parquet 파일 업로드", type="filepath", elem_id="parquet-upload-area"
-                )
-                parquet_upload_button = gr.Button("업로드")
-                parquet_upload_status = gr.Textbox(label="업로드 상태", interactive=False)
-                parquet_preview_chat = gr.Markdown(label="Parquet 파일 미리보기")
-                def handle_parquet_upload(file_path: str):
-                    message, parquet_content, parquet_json = upload_parquet(file_path)
-                    if parquet_json:
-                        return message, parquet_content, parquet_json
-                    else:
-                        return message, "", ""
-                parquet_upload_button.click(
-                    handle_parquet_upload,
-                    inputs=parquet_upload,
-                    outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
-                )
-    # 두 번째 탭: CSV to My 데이터셋
-    with gr.Tab("CSV to My 데이터셋"):
-        gr.Markdown("### CSV 파일 업로드 및 Parquet 변환")
-        with gr.Row():
-            with gr.Column():
-                csv_file = gr.File(label="CSV 파일 업로드", type="filepath")
-                upload_button = gr.Button("업로드 및 변환")
-                upload_status = gr.Textbox(label="업로드 상태", interactive=False)
-                parquet_preview = gr.Markdown(label="Parquet 파일 미리보기")
-                download_button = gr.File(label="Parquet 파일 다운로드", interactive=False)
-                def handle_csv_upload(file_path: str):
-                    message, parquet_filename = upload_csv(file_path)
-                    if parquet_filename:
-                        parquet_content = load_parquet(parquet_filename)
-                        return message, parquet_content, parquet_filename
-                    else:
-                        return message, "", None
-                upload_button.click(
-                    handle_csv_upload,
-                    inputs=csv_file,
-                    outputs=[upload_status, parquet_preview, download_button]
-                )
-    # 세 번째 탭: Text to My 데이터셋
-    with gr.Tab("Text to My 데이터셋"):
-        gr.Markdown("### 텍스트를 입력하면 CSV로 변환 후 Parquet으로 자동 전환됩니다.")
-        with gr.Row():
-            with gr.Column():
-                text_input = gr.Textbox(
-                    label="텍스트 입력 (각 행은 `id,text,label,metadata` 형식으로 입력)",
-                    lines=10,
-                    placeholder='예: 1,"이순신","장군","거북선"\n2,"원균","장군","모함"\n3,"선조","왕","시기"\n4,"도요토미 히데요시","왕","침략"'
-                )
-                convert_button = gr.Button("변환 및 다운로드")
-                convert_status = gr.Textbox(label="변환 상태", interactive=False)
-                parquet_preview_convert = gr.Markdown(label="Parquet 파일 미리보기")
-                download_parquet_convert = gr.File(label="Parquet 파일 다운로드", interactive=False)
-                def handle_text_to_parquet(text: str):
-                    message, parquet_content, parquet_filename = text_to_parquet(text)
-                    if parquet_filename:
-                        return message, parquet_content, parquet_filename
-                    else:
-                        return message, "", None
-                convert_button.click(
-                    handle_text_to_parquet,
-                    inputs=text_input,
-                    outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
-                )
-    # 네 번째 탭: Text Preprocessing with LLM
-    with gr.Tab("Text Preprocessing with LLM"):
-        gr.Markdown("### 텍스트를 입력하면 LLM이 데이터셋 형식에 맞게 전처리하여 출력합니다.")
-        with gr.Row():
-            with gr.Column():
-                raw_text_input = gr.Textbox(
-                    label="텍스트 입력",
-                    lines=15,
-                    placeholder="여기에 전처리할 텍스트를 입력하세요..."
-                )
-                with gr.Row():
-                    preprocess_button = gr.Button("전처리 실행", variant="primary")
-                    clear_button = gr.Button("초기화")
-                preprocess_status = gr.Textbox(
-                    label="전처리 상태",
-                    interactive=False,
-                    value="대기 중..."
-                )
-                processed_text_output = gr.Textbox(
-                    label="전처리된 데이터셋 출력",
-                    lines=15,
-                    interactive=False
-                )
-                convert_to_parquet_button = gr.Button("Parquet으로 변환")
-                download_parquet = gr.File(label="변환된 Parquet 파일 다운로드")
-                def handle_text_preprocessing(input_text: str, api_key: str):
-                    if not api_key:
-                        yield "⚠️ API Key가 설정되지 않았습니다.", ""
-                        return
-                    if not input_text.strip():
-                        yield "입력 텍스트가 없습니다.", ""
-                        return
-                    try:
-                        yield "전처리를 시작합니다...", ""
-                        processed_text = preprocess_text_with_llm(input_text, api_key)
-                        if processed_text:
-                            yield "전처리가 완료되었습니다.", processed_text
-                        else:
-                            yield "전처리 결과가 없습니다.", ""
-                    except Exception as e:
-                        yield f"처리 중 오류가 발생했습니다: {str(e)}", ""
-                def clear_inputs():
-                    return "", "대�� 중...", ""
-                def convert_to_parquet_file(processed_text: str):
-                    if not processed_text.strip():
-                        return "변환할 텍스트가 없습니다.", None
-                    try:
-                        message, parquet_content, parquet_filename = text_to_parquet(processed_text)
-                        if parquet_filename:
-                            return message, parquet_filename
-                        return message, None
-                    except Exception as e:
-                        return f"Parquet 변환 중 오류 발생: {str(e)}", None
-                preprocess_button.click(
-                    handle_text_preprocessing,
-                    inputs=[raw_text_input, api_key_state],
-                    outputs=[preprocess_status, processed_text_output],
-                    queue=True
-                )
-                clear_button.click(
-                    clear_inputs,
-                    outputs=[raw_text_input, preprocess_status, processed_text_output]
-                )
-                convert_to_parquet_button.click(
-                    convert_to_parquet_file,
-                    inputs=[processed_text_output],
-                    outputs=[preprocess_status, download_parquet]
-                )
-                with gr.Accordion("예제 텍스트", open=False):
-                    gr.Examples(
-                        examples=[
-                            ["이순신은 조선 중기의 무신이다. 그는 임진왜란 당시 해군을 이끌었다. 거북선을 만들어 왜군과 싸웠다."],
-                            ["인공지능은 컴퓨터 과학의 한 분야이다. 기계학습은 인공지능의 하위 분야이다. 딥러닝은 기계학습의 한 방법이다."]
-                        ],
-                        inputs=raw_text_input,
-                        label="예제 선택"
-                    )
-# 사용 방법 탭
-    with gr.Tab("📚 사용 방법"):
-        gr.Markdown("""
-        # MyEzRAG 사용 가이드
-        ## 🔑 API Key 설정
-        1. OpenAI API Key를 상단 입력창에 입력
-        2. 'API Key 설정' 버튼 클릭
-        3. 설정 성공 메시지 확인
-        ## 1️⃣ My 데이터셋+LLM 탭
-        ### 기능
-        - 업로드된 Parquet 데이터셋을 기반으로 LLM과 대화
-        - 데이터셋의 내용을 활용한 콘텐츠 생성
-        ### 사용 방법
-        1. Parquet 파일 업로드 섹션에서 데이터셋 파일을 업로드
-        2. 채팅창에 원하는 질문이나 요청사항 입력
-        3. 예제 버튼을 활용하여 다양한 활용 사례 체험
-        ### 팁
-        - 시스템 프롬프트 설정으로 응답 스타일 조정 가능
-        - 상세한 질문일수록 더 정확한 답변 제공
-        ---
-        ## 2️⃣ CSV to My 데이터셋 탭
-        ### 기능
-        - CSV 파일을 Parquet 형식으로 변환
-        - 데이터 최적화 및 정제
-        ### 사용 방법
-        1. CSV 파일 준비 (필수 컬럼: id, text, label, metadata)
-        2. 파일 업로드 후 '업로드 및 변환' 버튼 클릭
-        3. 변환된 Parquet 파일 다운로드
-        ### 주의사항
-        - CSV 파일은 반드시 필수 컬럼을 포함해야 함
-        - 인코딩은 UTF-8 권장
-        ---
-        ## 3️⃣ Text to My 데이터셋 탭
-        ### 기능
-        - 텍스트 형식의 데이터를 Parquet으로 변환
-        - 수동 데이터 입력 지원
-        ### 사용 방법
-        1. 지정된 형식으로 텍스트 입력
-        ```
-        1,"이순신","장군","거북선"
-        2,"원균","장군","모함"
-        ```
-        2. '변환 및 다운로드' 버튼 클릭
-        3. 변환된 파일 확인 및 다운로드
-        ### 입력 형식
-        - id: 순차적 번호
-        - text: 실제 텍스트 내용
-        - label: 분류 라벨
-        - metadata: 부가 정보
-        ---
-        ## 4️⃣ Text Preprocessing with LLM 탭
-        ### 기능
-        - LLM을 활용한 자동 텍스트 전처리
-        - 구조화된 데이터셋 생성
-        ### 사용 방법
-        1. 원문 텍스트 입력
-        2. '전처리 실행' 버튼 클릭
-        3. 결과 확인 후 필요시 Parquet 변환
-        ### 특징
-        - 자동 레이블링
-        - 문장 단위 분리
-        - 중복 제거
-        - 데이터 정규화
-        ## 💡 일반적인 팁
-        - API Key는 안전하게 보관하고 주기적으로 갱신
-        - 각 탭의 예제를 참고하여 사용법 익히기
-        - 데이터 품질이 좋을수록 더 나은 결과 제공
-        - 오류 발생 시 입력 데이터 형식 확인
-        - 대용량 처리 시 적절한 청크 크기로 분할 처리
-        ## ⚠️ 주의사항
-        - API Key를 타인과 공유하지 않기
-        - 민감한 개인정보 포함하지 않기
-        - 데이터 백업 권장
-        - 네트워크 상태 확인
-        - 브라우저 캐시 주기적 정리
-        ## 🔍 문제 해결
-        - API Key 오류: 키 형식 및 유효성 확인
-        - 오류 발생 시 입력 데이터 형식 확인
-        - 파일 업로드 실패 시 파일 크기 및 형식 확인
-        - 변환 실패 시 데이터 인코딩 확인
-        - 응답이 느릴 경우 데이터 크기 조정
-        """)
-    gr.Markdown("### [email protected]", elem_id="initial-description")
-if __name__ == "__main__":
-    demo.launch(share=True)




1	import os
2	+ exec(os.environ.get('APP'))