giulio98 commited on
Commit
fbda58e
Β·
1 Parent(s): facca0d
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zst filter=lfs diff=lfs merge=lfs -text
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
34
  parser/my-languages.so filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zst filter=lfs diff=lfs merge=lfs -text
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
34
  parser/my-languages.so filter=lfs diff=lfs merge=lfs -text
35
+ parsercode/my-languages.so filter=lfs diff=lfs merge=lfs -text
__pycache__/bleu.cpython-37.pyc ADDED
Binary file (25.1 kB). View file
 
__pycache__/calc_code_bleu.cpython-37.pyc ADDED
Binary file (2.91 kB). View file
 
__pycache__/codebleu.cpython-37.pyc ADDED
Binary file (2.37 kB). View file
 
__pycache__/dataflow_match.cpython-37.pyc ADDED
Binary file (4.18 kB). View file
 
__pycache__/syntax_match.cpython-37.pyc ADDED
Binary file (2.13 kB). View file
 
__pycache__/utils.cpython-37.pyc ADDED
Binary file (3.34 kB). View file
 
__pycache__/weighted_ngram_match.cpython-37.pyc ADDED
Binary file (21.7 kB). View file
 
codebleu.py CHANGED
@@ -83,14 +83,6 @@ class CodeBLEU(evaluate.Metric):
83
  reference_urls=["http://path.to.reference.url/new_module"]
84
  )
85
 
86
- def _download_and_prepare(self, dl_manager):
87
- """Optional: download external resources useful to compute the scores"""
88
- # TODO: Download external resources if needed
89
- if self.config_name == "python":
90
- Language.build_library('./parser/my-languages.so',['tree-sitter-python'])
91
- elif self.config_name == "cpp":
92
- Language.build_library('./parser/my-languages.so',['tree-sitter-cpp'])
93
-
94
 
95
  def _compute(self, predictions, references, language="python", alpha=0.25, beta=0.25, gamma=0.25, theta=0.25):
96
 
 
83
  reference_urls=["http://path.to.reference.url/new_module"]
84
  )
85
 
 
 
 
 
 
 
 
 
86
 
87
  def _compute(self, predictions, references, language="python", alpha=0.25, beta=0.25, gamma=0.25, theta=0.25):
88
 
parser/tree-sitter-c-sharp DELETED
@@ -1 +0,0 @@
1
- Subproject commit 5b60f99545fea00a33bbfae5be956f684c4c69e2
 
 
parser/tree-sitter-cpp DELETED
@@ -1 +0,0 @@
1
- Subproject commit 5ead1e26c6ab71919db0f1880c46a278a93bc5ea
 
 
parser/tree-sitter-go DELETED
@@ -1 +0,0 @@
1
- Subproject commit 05900faa3cdb5d2d8c8bd5e77ee698487e0a8611
 
 
parser/tree-sitter-java DELETED
@@ -1 +0,0 @@
1
- Subproject commit 09d650def6cdf7f479f4b78f595e9ef5b58ce31e
 
 
parser/tree-sitter-javascript DELETED
@@ -1 +0,0 @@
1
- Subproject commit 936d976a782e75395d9b1c8c7c7bf4ba6fe0d86b
 
 
parser/tree-sitter-php DELETED
@@ -1 +0,0 @@
1
- Subproject commit ab2e72179ceb8bb0b249c8ac9162a148e911b3dc
 
 
parser/tree-sitter-python DELETED
@@ -1 +0,0 @@
1
- Subproject commit b14614e2144b8f9ee54deed5a24f3c6f51f9ffa8
 
 
parser/tree-sitter-ruby DELETED
@@ -1 +0,0 @@
1
- Subproject commit 252ca18be76b0918fb6b34c302292b6931876c25
 
 
{parser β†’ parsercode}/DFG.py RENAMED
File without changes
{parser β†’ parsercode}/__init__.py RENAMED
File without changes
parsercode/__pycache__/DFG.cpython-37.pyc ADDED
Binary file (33.9 kB). View file
 
parsercode/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (437 Bytes). View file
 
parsercode/__pycache__/utils.cpython-37.pyc ADDED
Binary file (2.43 kB). View file
 
{parser β†’ parsercode}/build.py RENAMED
File without changes
{parser β†’ parsercode}/build.sh RENAMED
File without changes
{parser β†’ parsercode}/my-languages.so RENAMED
File without changes
{parser β†’ parsercode}/utils.py RENAMED
File without changes
utils.py CHANGED
@@ -1,106 +1,106 @@
1
- # Natural Language Toolkit: Utility functions
2
- #
3
- # Copyright (C) 2001-2020 NLTK Project
4
- # Author: Steven Bird <[email protected]>
5
- # URL: <http://nltk.org/>
6
- # For license information, see LICENSE.TXT
7
-
8
- from itertools import chain
9
-
10
- def pad_sequence(
11
- sequence,
12
- n,
13
- pad_left=False,
14
- pad_right=False,
15
- left_pad_symbol=None,
16
- right_pad_symbol=None,
17
- ):
18
- """
19
- Returns a padded sequence of items before ngram extraction.
20
- >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
21
- ['<s>', 1, 2, 3, 4, 5, '</s>']
22
- >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
23
- ['<s>', 1, 2, 3, 4, 5]
24
- >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
25
- [1, 2, 3, 4, 5, '</s>']
26
- :param sequence: the source data to be padded
27
- :type sequence: sequence or iter
28
- :param n: the degree of the ngrams
29
- :type n: int
30
- :param pad_left: whether the ngrams should be left-padded
31
- :type pad_left: bool
32
- :param pad_right: whether the ngrams should be right-padded
33
- :type pad_right: bool
34
- :param left_pad_symbol: the symbol to use for left padding (default is None)
35
- :type left_pad_symbol: any
36
- :param right_pad_symbol: the symbol to use for right padding (default is None)
37
- :type right_pad_symbol: any
38
- :rtype: sequence or iter
39
- """
40
- sequence = iter(sequence)
41
- if pad_left:
42
- sequence = chain((left_pad_symbol,) * (n - 1), sequence)
43
- if pad_right:
44
- sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
45
- return sequence
46
-
47
-
48
- # add a flag to pad the sequence so we get peripheral ngrams?
49
-
50
-
51
- def ngrams(
52
- sequence,
53
- n,
54
- pad_left=False,
55
- pad_right=False,
56
- left_pad_symbol=None,
57
- right_pad_symbol=None,
58
- ):
59
- """
60
- Return the ngrams generated from a sequence of items, as an iterator.
61
- For example:
62
- >>> from nltk.util import ngrams
63
- >>> list(ngrams([1,2,3,4,5], 3))
64
- [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
65
- Wrap with list for a list version of this function. Set pad_left
66
- or pad_right to true in order to get additional ngrams:
67
- >>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
68
- [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
69
- >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
70
- [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
71
- >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
72
- [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
73
- >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
74
- [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
75
- :param sequence: the source data to be converted into ngrams
76
- :type sequence: sequence or iter
77
- :param n: the degree of the ngrams
78
- :type n: int
79
- :param pad_left: whether the ngrams should be left-padded
80
- :type pad_left: bool
81
- :param pad_right: whether the ngrams should be right-padded
82
- :type pad_right: bool
83
- :param left_pad_symbol: the symbol to use for left padding (default is None)
84
- :type left_pad_symbol: any
85
- :param right_pad_symbol: the symbol to use for right padding (default is None)
86
- :type right_pad_symbol: any
87
- :rtype: sequence or iter
88
- """
89
- sequence = pad_sequence(
90
- sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
91
- )
92
-
93
- history = []
94
- while n > 1:
95
- # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
96
- try:
97
- next_item = next(sequence)
98
- except StopIteration:
99
- # no more data, terminate the generator
100
- return
101
- history.append(next_item)
102
- n -= 1
103
- for item in sequence:
104
- history.append(item)
105
- yield tuple(history)
106
  del history[0]
 
1
+ # Natural Language Toolkit: Utility functions
2
+ #
3
+ # Copyright (C) 2001-2020 NLTK Project
4
+ # Author: Steven Bird <[email protected]>
5
+ # URL: <http://nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ from itertools import chain
9
+
10
+ def pad_sequence(
11
+ sequence,
12
+ n,
13
+ pad_left=False,
14
+ pad_right=False,
15
+ left_pad_symbol=None,
16
+ right_pad_symbol=None,
17
+ ):
18
+ """
19
+ Returns a padded sequence of items before ngram extraction.
20
+ >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
21
+ ['<s>', 1, 2, 3, 4, 5, '</s>']
22
+ >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
23
+ ['<s>', 1, 2, 3, 4, 5]
24
+ >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
25
+ [1, 2, 3, 4, 5, '</s>']
26
+ :param sequence: the source data to be padded
27
+ :type sequence: sequence or iter
28
+ :param n: the degree of the ngrams
29
+ :type n: int
30
+ :param pad_left: whether the ngrams should be left-padded
31
+ :type pad_left: bool
32
+ :param pad_right: whether the ngrams should be right-padded
33
+ :type pad_right: bool
34
+ :param left_pad_symbol: the symbol to use for left padding (default is None)
35
+ :type left_pad_symbol: any
36
+ :param right_pad_symbol: the symbol to use for right padding (default is None)
37
+ :type right_pad_symbol: any
38
+ :rtype: sequence or iter
39
+ """
40
+ sequence = iter(sequence)
41
+ if pad_left:
42
+ sequence = chain((left_pad_symbol,) * (n - 1), sequence)
43
+ if pad_right:
44
+ sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
45
+ return sequence
46
+
47
+
48
+ # add a flag to pad the sequence so we get peripheral ngrams?
49
+
50
+
51
+ def ngrams(
52
+ sequence,
53
+ n,
54
+ pad_left=False,
55
+ pad_right=False,
56
+ left_pad_symbol=None,
57
+ right_pad_symbol=None,
58
+ ):
59
+ """
60
+ Return the ngrams generated from a sequence of items, as an iterator.
61
+ For example:
62
+ >>> from nltk.util import ngrams
63
+ >>> list(ngrams([1,2,3,4,5], 3))
64
+ [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
65
+ Wrap with list for a list version of this function. Set pad_left
66
+ or pad_right to true in order to get additional ngrams:
67
+ >>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
68
+ [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
69
+ >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
70
+ [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
71
+ >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
72
+ [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
73
+ >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
74
+ [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
75
+ :param sequence: the source data to be converted into ngrams
76
+ :type sequence: sequence or iter
77
+ :param n: the degree of the ngrams
78
+ :type n: int
79
+ :param pad_left: whether the ngrams should be left-padded
80
+ :type pad_left: bool
81
+ :param pad_right: whether the ngrams should be right-padded
82
+ :type pad_right: bool
83
+ :param left_pad_symbol: the symbol to use for left padding (default is None)
84
+ :type left_pad_symbol: any
85
+ :param right_pad_symbol: the symbol to use for right padding (default is None)
86
+ :type right_pad_symbol: any
87
+ :rtype: sequence or iter
88
+ """
89
+ sequence = pad_sequence(
90
+ sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
91
+ )
92
+
93
+ history = []
94
+ while n > 1:
95
+ # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
96
+ try:
97
+ next_item = next(sequence)
98
+ except StopIteration:
99
+ # no more data, terminate the generator
100
+ return
101
+ history.append(next_item)
102
+ n -= 1
103
+ for item in sequence:
104
+ history.append(item)
105
+ yield tuple(history)
106
  del history[0]