Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,177 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipe
|
|
2 |
import streamlit as st
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
@st.cache_data
|
6 |
def prepare_model():
|
7 |
"""
|
@@ -37,7 +208,13 @@ def format_predictions(preds) -> str:
|
|
37 |
"""
|
38 |
out = ""
|
39 |
for i, item in enumerate(preds):
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
return out
|
42 |
|
43 |
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
|
5 |
+
# Source: https://arxiv.org/category_taxonomy
|
6 |
+
arxiv_categories = {
|
7 |
+
"cs": "computer science",
|
8 |
+
"cs.AI": "artificial intelligence",
|
9 |
+
"cs.AR": "hardware architecture",
|
10 |
+
"cs.CC": "computational complexity",
|
11 |
+
"cs.CE": "computational engineering, finance, and science",
|
12 |
+
"cs.CG": "computational geometry",
|
13 |
+
"cs.CL": "computation and language",
|
14 |
+
"cs.CR": "cryptography and security",
|
15 |
+
"cs.CV": "computer vision and pattern recognition",
|
16 |
+
"cs.CY": "computers and society",
|
17 |
+
"cs.DB": "databases",
|
18 |
+
"cs.DC": "distributed, parallel, and cluster computing",
|
19 |
+
"cs.DL": "digital libraries",
|
20 |
+
"cs.DM": "discrete mathematics",
|
21 |
+
"cs.DS": "data structures and algorithms",
|
22 |
+
"cs.ET": "emerging technologies",
|
23 |
+
"cs.FL": "formal languages and automata theory",
|
24 |
+
"cs.GL": "general literature",
|
25 |
+
"cs.GR": "graphics",
|
26 |
+
"cs.GT": "computer science and game theory",
|
27 |
+
"cs.HC": "human-computed interaction",
|
28 |
+
"cs.IR": "information retrieval",
|
29 |
+
"cs.IT": "information theory",
|
30 |
+
"cs.LG": "machine learning",
|
31 |
+
"cs.LO": "logic in computer science",
|
32 |
+
"cs.MA": "multiagent systems",
|
33 |
+
"cs.MM": "multimedia",
|
34 |
+
"cs.MS": "mathematical software",
|
35 |
+
"cs.NA": "numerical analysis",
|
36 |
+
"cs.NE": "neural and evolutionary computing",
|
37 |
+
"cs.NI": "networking and internet architecture",
|
38 |
+
"cs.OH": "other computer science",
|
39 |
+
"cs.OS": "operating systems",
|
40 |
+
"cs.PF": "performance",
|
41 |
+
"cs.PL": "programming languages",
|
42 |
+
"cs.RO": "robotics",
|
43 |
+
"cs.SC": "symbolic computing",
|
44 |
+
"cs.SD": "sounds",
|
45 |
+
"cs.SE": "software engineering",
|
46 |
+
"cs.SI": "social and information networks",
|
47 |
+
"cs.SY": "systems and control",
|
48 |
+
"econ": "economics",
|
49 |
+
"econ.EM": "econometrics",
|
50 |
+
"econ.GN": "general economics",
|
51 |
+
"econ.TH": "theoretical economics",
|
52 |
+
"eess": "electrical engineering and systems science",
|
53 |
+
"eess.AS": "audio and speech processing",
|
54 |
+
"eess.IV": "image and video processing",
|
55 |
+
"eess.SP": "signal processing",
|
56 |
+
"eess.SY": "systems and control",
|
57 |
+
"math": "mathematics",
|
58 |
+
"math.AC": "commutative algebra",
|
59 |
+
"math.AG": "algebraic geometry",
|
60 |
+
"math.AP": "analysis of PDEs",
|
61 |
+
"math.AT": "algebraic topology",
|
62 |
+
"math.CA": "classical analysis and ODEs",
|
63 |
+
"math.CO": "combinatorics",
|
64 |
+
"math.CT": "category theory",
|
65 |
+
"math.CV": "complex variables",
|
66 |
+
"math.DG": "differential geometry",
|
67 |
+
"math.DS": "dynamical systems",
|
68 |
+
"math.FA": "functional analysis",
|
69 |
+
"math.GM": "general mathematics",
|
70 |
+
"math.GN": "general topology",
|
71 |
+
"math.GR": "group theory",
|
72 |
+
"math.GT": "geometric topology",
|
73 |
+
"math.HO": "history and overview",
|
74 |
+
"math.IT": "information theory",
|
75 |
+
"math.KT": "k-theory and homology",
|
76 |
+
"math.LO": "logic",
|
77 |
+
"math.MG": "metric geometry",
|
78 |
+
"math.MP": "mathematical physics",
|
79 |
+
"math.NA": "numerical analysis",
|
80 |
+
"math.NT": "number theory",
|
81 |
+
"math.OA": "operator algebras",
|
82 |
+
"math.OC": "optimization and control",
|
83 |
+
"math.PR": "probability",
|
84 |
+
"math.QA": "quantum algebra",
|
85 |
+
"math.RA": "rings and algebras",
|
86 |
+
"math.RT": "representation theory",
|
87 |
+
"math.SG": "symplectic geometry",
|
88 |
+
"math.SP": "spectral theory",
|
89 |
+
"math.ST": "statistics theory",
|
90 |
+
"astro-ph": "astrophysics",
|
91 |
+
"astro-ph.CO": "cosmology and nongalactic astrophysics",
|
92 |
+
"astro-ph.EP": "earth and planetary astrophysics",
|
93 |
+
"astro-ph.GA": "astrophysics of galaxies",
|
94 |
+
"astro-ph.HE": "high energy astrophysical phenomena",
|
95 |
+
"astro-ph.IM": "instrumentation and Methods for astrophysics",
|
96 |
+
"astro-ph.SR": "solar and stellar astrophysics",
|
97 |
+
"cond-mat": "condensed matter",
|
98 |
+
"cond-mat.dis-nn": "disordered systems and neural networks",
|
99 |
+
"cond-mat.mes-hall": "mesoscale and nanoscale physics",
|
100 |
+
"cond-mat.mtrl-sci": "materials science",
|
101 |
+
"cond-mat.other (Other": "ondensed matter",
|
102 |
+
"cond-mat.quant-gas": "quantum gases",
|
103 |
+
"cond-mat.soft (Soft": "ondensed matter",
|
104 |
+
"cond-mat.stat-mech": "statistical mechanics",
|
105 |
+
"cond-mat.str-el": "strongly correlated electrons",
|
106 |
+
"cond-mat.supr-con": "superconductivity",
|
107 |
+
"gr": "general relativity and quantum cosmology",
|
108 |
+
"gr-qc": "general relativity and quantum cosmology",
|
109 |
+
"hep-ex": "high enerty physics - experiment",
|
110 |
+
"hep-lat": "high enerty physics - lattice",
|
111 |
+
"hep-ph": "high enerty physics - phenomenology",
|
112 |
+
"hep-th": "high enerty physics - theory",
|
113 |
+
"math-ph": "mathematical physics",
|
114 |
+
"nlin": "nonlinear sciences",
|
115 |
+
"nlin.AO": "adaptation and Self-organizing systems",
|
116 |
+
"nlin.CD": "chaotic dynamics",
|
117 |
+
"nlin.CG": "cellular automata and lattice gases",
|
118 |
+
"nlin.PS": "pattern formation and solitons",
|
119 |
+
"nlin.SI": "exactly solvable and integrable systems",
|
120 |
+
"nucl-ex": "nuclear experiment",
|
121 |
+
"nucl-th": "nuclear theory",
|
122 |
+
"physics.acc-ph": "accelerator physics",
|
123 |
+
"physics.ao-ph": "atmospheric and oceanic physics",
|
124 |
+
"physics.app-ph": "applied physics",
|
125 |
+
"physics.atm-clus": "atomic and molecular clusters",
|
126 |
+
"physics.atom-ph": "atomic physics",
|
127 |
+
"physics.bio-ph": "biological physics",
|
128 |
+
"physics.chem-ph": "chemical physics",
|
129 |
+
"physics.class-ph": "classical physics",
|
130 |
+
"physics.comp-ph": "computational physics",
|
131 |
+
"physics.data-an": "data analysis, statistics and probability",
|
132 |
+
"physics.ed-ph": "physics education",
|
133 |
+
"physics.flu-dyn": "fluid dynamics",
|
134 |
+
"physics.gen-ph": "general physics",
|
135 |
+
"physics.geo-ph": "geophysics",
|
136 |
+
"physics.hist-ph": "history and philosophy of physics",
|
137 |
+
"physics.ins-det": "instrumentation and detectors",
|
138 |
+
"physics.med-ph": "medical physics",
|
139 |
+
"physics.optics": "optics",
|
140 |
+
"physics.plasm-ph": "plasma physics",
|
141 |
+
"physics.pop-ph": "popular physics",
|
142 |
+
"physics.soc-ph": "physics and society",
|
143 |
+
"physics.space-ph": "space physics",
|
144 |
+
"quant-ph": "quantum physics",
|
145 |
+
"q-bio": "quantitative biology",
|
146 |
+
"q-bio.BM": "biomolecules",
|
147 |
+
"q-bio.CB": "cell behavior",
|
148 |
+
"q-bio.GN": "genomics",
|
149 |
+
"q-bio.MN": "molecular networks",
|
150 |
+
"q-bio.NC": "Neurons and cognition",
|
151 |
+
"q-bio.OT": "other quantitative biology",
|
152 |
+
"q-bio.PE": "populations and evolution",
|
153 |
+
"q-bio.QM": "quantitative methods",
|
154 |
+
"q-bio.SC": "subcellular processes",
|
155 |
+
"q-bio.TO": "tissues and organs",
|
156 |
+
"q-fin": "quantitative finance",
|
157 |
+
"q-fin.CP": "computational finance",
|
158 |
+
"q-fin.EC": "economics",
|
159 |
+
"q-fin.GN": "general finance",
|
160 |
+
"q-fin.MF": "mathematical finance",
|
161 |
+
"q-fin.PM": "portfolio management",
|
162 |
+
"q-fin.PR": "pricing of securities",
|
163 |
+
"q-fin.RM": "risk management",
|
164 |
+
"q-fin.ST": "statistical finance",
|
165 |
+
"q-fin.TR": "trading and market microstructure",
|
166 |
+
"stat": "statistics",
|
167 |
+
"stat.AP": "applications",
|
168 |
+
"stat.CO": "computation",
|
169 |
+
"stat.ME": "methodology",
|
170 |
+
"stat.ML": "machine learning",
|
171 |
+
"stat.OT": "other statistics",
|
172 |
+
"stat.TH": "statistics theory",
|
173 |
+
}
|
174 |
+
|
175 |
+
|
176 |
@st.cache_data
|
177 |
def prepare_model():
|
178 |
"""
|
|
|
208 |
"""
|
209 |
out = ""
|
210 |
for i, item in enumerate(preds):
|
211 |
+
label = item["label"]
|
212 |
+
score = item["score"]
|
213 |
+
description = arxiv_categories.get(label, "")
|
214 |
+
if description != "":
|
215 |
+
out += f"{i+1}. **{item['label']}** ({description}) *(score {item['score']:.2f})*\n"
|
216 |
+
else:
|
217 |
+
out += f"{i+1}. **{item['label']}** *(score {item['score']:.2f})*\n"
|
218 |
return out
|
219 |
|
220 |
|