wolfrage89
commited on
Commit
·
1e9e906
1
Parent(s):
7891a3f
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Roberta based NER
|
2 |
+
This model will take in a new article label 3 entities [ORGS, SEGNUM, NUM]. This model is train on reuters news articles
|
3 |
+
|
4 |
+
## Try out on huggingface Spaces
|
5 |
+
https://huggingface.co/spaces/wolfrage89/company_segments_ner
|
6 |
+
|
7 |
+
## colab sample notebook
|
8 |
+
https://colab.research.google.com/drive/165utMQzYVAX7-aQjWjpmPHwHpdKTaHBa?usp=sharing
|
9 |
+
|
10 |
+
## How to use
|
11 |
+
```python
|
12 |
+
|
13 |
+
from transformers import pipeline
|
14 |
+
|
15 |
+
# Minimum code
|
16 |
+
sentence = """Exxon Mobil Corporation is engaged in energy business. The Company is engaged in the exploration, production, trade, transportation and sale of crude oil and natural gas, and the manufacture, transportation and sale of crude oil, natural gas, petroleum products, petrochemicals and a range of specialty products. The Company's segments include Upstream, Downstream, Chemical, and Corporate and Financing. The Upstream segment operates to explore for and produce crude oil and natural gas. The Downstream manufactures, trades and sells petroleum products. The refining and supply operations consists of a global network of manufacturing plants, transportation systems, and distribution centers that provide a range of fuels, lubricants and other products and feedstocks to its customers around the world. The Chemical segment manufactures and sells petrochemicals. The Chemical business supplies olefins, polyolefins, aromatics, and a variety of other petrochemicals."""
|
17 |
+
|
18 |
+
|
19 |
+
model = pipeline('ner', "wolfrage89/company_segment_ner")
|
20 |
+
model_output = model(sentence)
|
21 |
+
|
22 |
+
print(model_ouput)
|
23 |
+
# [{'entity': 'B-ORG', 'score': 0.99996805, 'index': 1, 'word': 'Ex', 'start': 0, 'end': 2}, {'entity': 'I-ORG', 'score': 0.99971646, 'index': 2, 'word': 'xon', 'start': 2, 'end': 5}, ....]
|
24 |
+
|
25 |
+
|
26 |
+
# Sample helper function if you want to use
|
27 |
+
def ner_prediction(model, sentence):
|
28 |
+
entity_map = {
|
29 |
+
"B-ORG":"ORG",
|
30 |
+
"B-SEG":"SEG",
|
31 |
+
"B-SEGNUM":"SEGNUM"
|
32 |
+
}
|
33 |
+
results = []
|
34 |
+
model_output = model(sentence)
|
35 |
+
|
36 |
+
accumulate = ""
|
37 |
+
current_class = None
|
38 |
+
start = 0
|
39 |
+
end = 0
|
40 |
+
for item in model_output:
|
41 |
+
if item['entity'].startswith("B"):
|
42 |
+
if len(accumulate) >0:
|
43 |
+
results.append((current_class, accumulate, start, end))
|
44 |
+
accumulate = item['word'].lstrip("Ġ")
|
45 |
+
current_class = entity_map[item['entity']]
|
46 |
+
start=item['start']
|
47 |
+
end = item['end']
|
48 |
+
|
49 |
+
else:
|
50 |
+
if item['word'].startswith("Ġ"):
|
51 |
+
accumulate+=" "+item['word'].lstrip("Ġ")
|
52 |
+
|
53 |
+
else:
|
54 |
+
accumulate+=item['word']
|
55 |
+
end = item['end']
|
56 |
+
|
57 |
+
# clear last cache
|
58 |
+
if len(accumulate)>0:
|
59 |
+
results.append((current_class, accumulate, start, end))
|
60 |
+
|
61 |
+
return results
|
62 |
+
|
63 |
+
```
|