codegen / services /ibm_model /ibm_extract_code_block.py
AP\VivekIsh
codegen: Stage the code
6fadbbc
raw
history blame
1.68 kB
import re
from services.model_visitor import ModelVisitor
class IbmExtractCodeblock(ModelVisitor):
def visit(self, _, data):
return self._get_code_block(data)
def _get_code_block(self, data):
r"""
Extracts text blocks from the input string based on a specific pattern.
Args:
data (str): The input string containing text blocks.
Returns:
str: A text block of output which contains code extracted from the input string.
Regex Pattern:
(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```
- (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:'
followed by any characters including newlines, capturing them within a group.
- (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|>
or the end of the string (\Z).
- |: This is an OR operator, meaning the regex will match either the pattern
before or after it.
- ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```),
possibly preceded by one or more word characters (\w+), capturing any characters
including newlines.
"""
pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```'
matches = re.findall(pattern, data, re.DOTALL)
code = []
for match in matches:
if match[0]:
code.append(match[0].strip())
elif match[1]:
code.append(match[1].strip())
return ''.join(code)