Daemontatox
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import spaces
|
2 |
import subprocess
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
|
10 |
|
11 |
import os
|
@@ -31,6 +31,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBy
|
|
31 |
from langchain_cerebras import ChatCerebras
|
32 |
|
33 |
|
|
|
34 |
# Configure logging
|
35 |
logging.basicConfig(level=logging.INFO)
|
36 |
logger = logging.getLogger(__name__)
|
@@ -145,10 +146,7 @@ llm = ChatOpenAI(
|
|
145 |
timeout=None,
|
146 |
max_retries=2,
|
147 |
streaming=True,
|
148 |
-
|
149 |
-
|
150 |
|
151 |
-
|
152 |
)
|
153 |
|
154 |
|
|
|
1 |
import spaces
|
2 |
import subprocess
|
3 |
|
4 |
+
subprocess.run(
|
5 |
+
'pip install flash-attn --no-build-isolation',
|
6 |
+
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
7 |
+
shell=True
|
8 |
+
)
|
9 |
|
10 |
|
11 |
import os
|
|
|
31 |
from langchain_cerebras import ChatCerebras
|
32 |
|
33 |
|
34 |
+
|
35 |
# Configure logging
|
36 |
logging.basicConfig(level=logging.INFO)
|
37 |
logger = logging.getLogger(__name__)
|
|
|
146 |
timeout=None,
|
147 |
max_retries=2,
|
148 |
streaming=True,
|
|
|
|
|
149 |
|
|
|
150 |
)
|
151 |
|
152 |
|