Add system features to UI including kernel version

#2
by jodh-intel - opened
Files changed (2) hide show
  1. app.py +172 -8
  2. requirements.txt +13 -2
app.py CHANGED
@@ -1,35 +1,199 @@
1
  from transformers import AutoModel, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM
2
  import gradio as gr
3
  import torch
 
 
 
 
 
 
 
 
 
4
 
5
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
6
 
7
- tokenizer = LlamaTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3", trust_remote_code=True)
8
- model = LlamaForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3", trust_remote_code=True).to(DEVICE)
 
 
 
 
 
 
 
9
  model = model.eval()
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def predict(input, history=None):
12
  if history is None:
13
  history = []
14
- new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors='pt')
 
 
15
  bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
16
- history = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist()
 
 
17
  # convert the tokens to text, and then split the responses into the right format
18
  response = tokenizer.decode(history[0]).split("<|endoftext|>")
19
- response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] # convert to tuples of list
 
 
20
  return response, history
21
 
22
 
23
  with gr.Blocks() as demo:
24
- gr.Markdown('''## Confidential HuggingFace Runner
25
- ''')
 
 
26
  state = gr.State([])
27
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)
28
  with gr.Row():
29
  with gr.Column(scale=4):
30
- txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
 
 
31
  with gr.Column(scale=1):
32
  button = gr.Button("Generate")
33
  txt.submit(predict, [txt, state], [chatbot, state])
34
  button.click(predict, [txt, state], [chatbot, state])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  demo.queue().launch(share=True, server_name="0.0.0.0")
 
1
  from transformers import AutoModel, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM
2
  import gradio as gr
3
  import torch
4
+ import os
5
+ import io
6
+ import sys
7
+ import platform
8
+ import intel_extension_for_pytorch as ipex
9
+ import intel_extension_for_pytorch._C as ipex_core
10
+ from cpuinfo import get_cpu_info
11
+ from contextlib import redirect_stdout
12
+
13
 
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
+ ROOT = '/'
17
+ SELF_ROOT = '/proc/self/root'
18
+
19
+ tokenizer = LlamaTokenizer.from_pretrained(
20
+ "lmsys/vicuna-7b-v1.3", trust_remote_code=True
21
+ )
22
+ model = LlamaForCausalLM.from_pretrained(
23
+ "lmsys/vicuna-7b-v1.3", trust_remote_code=True
24
+ ).to(DEVICE)
25
  model = model.eval()
26
 
27
+
28
+ def in_chroot():
29
+ '''
30
+ Return true if running in a chroot environment.
31
+ '''
32
+ try:
33
+ root_stat = os.stat(ROOT)
34
+ self_stat = os.stat(SELF_ROOT)
35
+ except FileNotFoundError as e:
36
+ sys.exit(f"ERROR: Failed to stat: {e}")
37
+
38
+ root_inode = root_stat.st_ino
39
+ self_inode = self_stat.st_ino
40
+
41
+ # Inode 2 is the root inode for most filesystems.
42
+ # However, XFS uses 128 for root.
43
+ if root_inode not in [2, 128]:
44
+ return True
45
+
46
+ return not (root_inode == self_inode)
47
+
48
+
49
+ def get_features():
50
+ '''
51
+ Returns a dictionary of all feature:
52
+
53
+ key: feature name.
54
+ value: Boolean showing if feature available.
55
+ '''
56
+
57
+ cpu_info = get_cpu_info()
58
+ flags = cpu_info["flags"]
59
+
60
+ detect_ipex_amx_enabled = lambda: ipex_core._get_current_isa_level() == 'AMX'
61
+ detect_ipex_amx_available = (
62
+ lambda: ipex_core._get_highest_cpu_support_isa_level() == 'AMX'
63
+ )
64
+
65
+ features = {
66
+ 'VM': 'hypervisor' in flags,
67
+ 'TDX TD': 'tdx_guest' in flags,
68
+ 'AMX available': 'amx_tile' in flags,
69
+ 'AMX-BF16 available': 'amx_bf16' in flags,
70
+ 'AMX-INT8 available': 'amx_int8' in flags,
71
+ 'AVX-VNNI available': 'avx_vnni' in flags,
72
+ 'AVX512-VNNI available': 'avx512_vnni' in flags,
73
+ 'AVX512-FP16 available': 'avx512_fp16' in flags,
74
+ 'AVX512-BF16 available': 'avx512_bf16' in flags,
75
+ 'AMX IPEX available': detect_ipex_amx_available(),
76
+ 'AMX IPEX enabled': detect_ipex_amx_enabled(),
77
+ }
78
+
79
+ return features
80
+
81
+
82
+ def get_debug_details():
83
+ '''
84
+ Return a block of markdown text that shows useful debug
85
+ information.
86
+ '''
87
+
88
+ # ipex.version() prints to stdout, so redirect stdout to
89
+ # capture the output.
90
+ buffer = io.StringIO()
91
+
92
+ with redirect_stdout(buffer):
93
+ ipex.version()
94
+
95
+ ipex_version_details = buffer.getvalue().replace("\n", ", ")
96
+
97
+ ipex_current_isa_level = ipex_core._get_current_isa_level()
98
+ ipex_max_isa_level = ipex_core._get_highest_cpu_support_isa_level()
99
+
100
+ ipex_env_var = os.getenv('ATEN_CPU_CAPABILITY')
101
+ onednn_env_var = os.getenv('ONEDNN_MAX_CPU_ISA')
102
+
103
+ with open('/proc/version', 'r') as f:
104
+ kernel_version = f.read().rstrip()
105
+
106
+ in_chroot_result = in_chroot()
107
+
108
+ cpu_info = get_cpu_info()
109
+ flags = cpu_info["flags"]
110
+
111
+ # Note that rather than using `<details>`, we could use gradio.Accordian(),
112
+ # but the markdown version is more visually compact.
113
+ md = f"""
114
+ <details>
115
+ <summary>Click to show debug details</summary>
116
+
117
+ | Feature | Value |
118
+ |-|-|
119
+ | Arch | `{cpu_info['arch']}` |
120
+ | CPU | `{cpu_info['brand_raw']}` |
121
+ | CPU flags | `{flags}` |
122
+ | Kernel | `{kernel_version}` |
123
+ | Python version | `{sys.version}` (implementation: `{platform.python_implementation()}`) |
124
+ | Python version details | `{sys.version_info}` |
125
+ | PyTorch version | `{torch.__version__}` |
126
+ | IPEX version | `{ipex.ipex_version}` |
127
+ | IPEX CPU detected | `{ipex_core._has_cpu()}` |
128
+ | IPEX XPU detected | `{ipex_core._has_xpu()}` |
129
+ | IPEX version details | `{ipex_version_details}` |
130
+ | IPEX env var `ATEN_CPU_CAPABILITY` | `{ipex_env_var}` |
131
+ | IPEX current ISA level | `{ipex_current_isa_level}` |
132
+ | IPEX max ISA level | `{ipex_max_isa_level}` |
133
+ | oneDNN env var `ONEDNN_MAX_CPU_ISA` | `{onednn_env_var}` |
134
+ | in chroot | `{in_chroot_result}` |
135
+
136
+ </details>
137
+ """
138
+
139
+ return md
140
+
141
+
142
  def predict(input, history=None):
143
  if history is None:
144
  history = []
145
+ new_user_input_ids = tokenizer.encode(
146
+ input + tokenizer.eos_token, return_tensors='pt'
147
+ )
148
  bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
149
+ history = model.generate(
150
+ bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id
151
+ ).tolist()
152
  # convert the tokens to text, and then split the responses into the right format
153
  response = tokenizer.decode(history[0]).split("<|endoftext|>")
154
+ response = [
155
+ (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)
156
+ ] # convert to tuples of list
157
  return response, history
158
 
159
 
160
  with gr.Blocks() as demo:
161
+ gr.Markdown(
162
+ '''## Confidential HuggingFace Runner
163
+ '''
164
+ )
165
  state = gr.State([])
166
  chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)
167
  with gr.Row():
168
  with gr.Column(scale=4):
169
+ txt = gr.Textbox(
170
+ show_label=False, placeholder="Enter text and press enter"
171
+ ).style(container=False)
172
  with gr.Column(scale=1):
173
  button = gr.Button("Generate")
174
  txt.submit(predict, [txt, state], [chatbot, state])
175
  button.click(predict, [txt, state], [chatbot, state])
176
+
177
+ with gr.Row():
178
+ features_dict = get_features()
179
+
180
+ all_features = features_dict.keys()
181
+
182
+ # Get a list of feature names that are actually set/available
183
+ set_features = [key for key in features_dict if features_dict[key]]
184
+
185
+ gr.CheckboxGroup(
186
+ all_features,
187
+ label="Features",
188
+ # Make the boxes read-only
189
+ interactive=False,
190
+ # Specify which features were detected
191
+ value=set_features,
192
+ info="Features detected from environment",
193
+ )
194
+
195
+ with gr.Row():
196
+ debug_details = get_debug_details()
197
+ gr.Markdown(debug_details)
198
+
199
  demo.queue().launch(share=True, server_name="0.0.0.0")
requirements.txt CHANGED
@@ -1,6 +1,17 @@
1
- torch
 
 
 
 
 
 
2
  cpm_kernels
3
  icetk
4
  gradio==3.50.2
5
  accelerate
6
- git+https://github.com/huggingface/transformers
 
 
 
 
 
 
1
+ # For pytorch
2
+ --find-links https://download.pytorch.org/whl/torch_stable.html
3
+
4
+ # For ipex
5
+ --trusted-host pytorch-extension.intel.com
6
+ --extra-index-url http://pytorch-extension.intel.com/release-whl/stable/cpu/us/intel-extension-for-pytorchtorch
7
+
8
  cpm_kernels
9
  icetk
10
  gradio==3.50.2
11
  accelerate
12
+ git+https://github.com/huggingface/transformers
13
+ py-cpuinfo
14
+
15
+ # Versions must match
16
+ torch==2.3.0+cpu
17
+ intel-extension-for-pytorch==2.3.0