rphrp1985 commited on
Commit
9bfe2be
·
verified ·
1 Parent(s): 5097018

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -27
app.py CHANGED
@@ -45,50 +45,61 @@ token=os.getenv('token')
45
  print('token = ',token)
46
 
47
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
48
 
49
  # model_id = "mistralai/Mistral-7B-v0.3"
50
 
51
- model_id = "microsoft/Phi-3-medium-4k-instruct"
52
- model_id = "microsoft/phi-4"
53
 
54
- # model_id = "Qwen/Qwen2-7B-Instruct"
55
 
56
 
57
- tokenizer = AutoTokenizer.from_pretrained(
58
- # model_id
59
- model_id,
60
- # use_fast=False
61
- token= token,
62
- trust_remote_code=True)
63
 
64
 
65
- accelerator = Accelerator()
66
 
67
- model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
68
- # torch_dtype= torch.uint8,
69
- torch_dtype=torch.bfloat16,
70
- # load_in_8bit=True,
71
- # # # torch_dtype=torch.fl,
72
- attn_implementation="flash_attention_2",
73
- low_cpu_mem_usage=True,
74
- trust_remote_code=True,
75
- device_map='cuda',
76
- # device_map=accelerator.device_map,
77
 
78
- )
79
 
80
 
81
 
82
 
83
 
84
- #
85
- model = accelerator.prepare(model)
86
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
87
 
88
- pipe = pipeline(
 
 
 
 
 
 
 
 
 
89
  "text-generation",
90
- model=model,
91
- tokenizer=tokenizer,
 
92
  )
93
 
94
 
 
45
  print('token = ',token)
46
 
47
  from transformers import AutoModelForCausalLM, AutoTokenizer
48
+ import transformers
49
 
50
  # model_id = "mistralai/Mistral-7B-v0.3"
51
 
52
+ # model_id = "microsoft/Phi-3-medium-4k-instruct"
53
+ # model_id = "microsoft/phi-4"
54
 
55
+ # # model_id = "Qwen/Qwen2-7B-Instruct"
56
 
57
 
58
+ # tokenizer = AutoTokenizer.from_pretrained(
59
+ # # model_id
60
+ # model_id,
61
+ # # use_fast=False
62
+ # token= token,
63
+ # trust_remote_code=True)
64
 
65
 
66
+ # accelerator = Accelerator()
67
 
68
+ # model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
69
+ # # torch_dtype= torch.uint8,
70
+ # torch_dtype=torch.bfloat16,
71
+ # # load_in_8bit=True,
72
+ # # # # torch_dtype=torch.fl,
73
+ # attn_implementation="flash_attention_2",
74
+ # low_cpu_mem_usage=True,
75
+ # trust_remote_code=True,
76
+ # device_map='cuda',
77
+ # # device_map=accelerator.device_map,
78
 
79
+ # )
80
 
81
 
82
 
83
 
84
 
85
+ # #
86
+ # model = accelerator.prepare(model)
87
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
88
 
89
+ # pipe = pipeline(
90
+ # "text-generation",
91
+ # model=model,
92
+ # tokenizer=tokenizer,
93
+ # )
94
+
95
+
96
+
97
+
98
+ pipeline = transformers.pipeline(
99
  "text-generation",
100
+ model="microsoft/phi-4",
101
+ model_kwargs={"torch_dtype": "auto"},
102
+ device_map="auto",
103
  )
104
 
105