alexkueck commited on
Commit
dbcd7ed
·
1 Parent(s): 28a1b8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -74,14 +74,16 @@ splittet = False
74
  print ("Inf.Client")
75
  #client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
76
  #client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
77
- client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
 
 
78
 
79
  ##############################################
80
  # tokenizer for generating prompt
81
  ##############################################
82
  print ("Tokenizer")
83
  #tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
84
- tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
85
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
86
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
87
 
@@ -279,22 +281,21 @@ def generate(text, history, rag_option, model_option, temperature=0.5, max_new_
279
 
280
  #Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
281
  #payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
282
- payload = tokenizer.apply_chat_template(prompt,tokenize=False)
283
- result = client.text_generation(
284
- payload,
285
- do_sample=True,
286
- return_full_text=False,
287
- max_new_tokens=2048,
288
- top_p=0.9,
289
- temperature=0.6,
290
- )
291
  except Exception as e:
292
  raise gr.Error(e)
293
 
 
 
294
  #Antwort als Stream ausgeben...
295
- for i in range(len(result)):
296
- time.sleep(0.05)
297
- yield result[: i+1]
298
 
299
 
300
 
 
74
  print ("Inf.Client")
75
  #client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
76
  #client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
77
+ #Inference mit Authorisation:
78
+ API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
79
+ HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
80
 
81
  ##############################################
82
  # tokenizer for generating prompt
83
  ##############################################
84
  print ("Tokenizer")
85
  #tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
86
+ #tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
87
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
88
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
89
 
 
281
 
282
  #Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
283
  #payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
284
+ #Für LLAMA:
285
+ #payload = tokenizer.apply_chat_template(prompt,tokenize=False)
286
+ #result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
287
+ #inference allg:
288
+ result= requests.post(API_URL, headers=HEADERS, json=prompt)
289
+
 
 
 
290
  except Exception as e:
291
  raise gr.Error(e)
292
 
293
+ return result.json()
294
+
295
  #Antwort als Stream ausgeben...
296
+ #for i in range(len(result)):
297
+ #time.sleep(0.05)
298
+ #yield result[: i+1]
299
 
300
 
301