import os import spaces import torch import transformers import gradio as gr access_token = os.environ.getattribute("HF_ACCESS_TOKEN") model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct" pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto", token=HF_ACCESS_TOKEN ) @spaces.GPU def generate_text(input_text): prompt = {"role": "system", "content": "Summarize the following: "+input_text} output = pipeline(prompt, max_new_tokens=256, ) return output iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(placeholder="Enter prompt..."), outputs="text", title="LLaMA 3 8B Text Generation" ) iface.launch(server_name="0.0.0.0", server_port=7860)