Spaces:
Sleeping
Sleeping
import re | |
from services.model_visitor import ModelVisitor | |
class IbmExtractCodeblock(ModelVisitor): | |
def visit(self, _, data): | |
return self._get_code_block(data) | |
def _get_code_block(self, data): | |
r""" | |
Extracts text blocks from the input string based on a specific pattern. | |
Args: | |
data (str): The input string containing text blocks. | |
Returns: | |
str: A text block of output which contains code extracted from the input string. | |
Regex Pattern: | |
(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n``` | |
- (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:' | |
followed by any characters including newlines, capturing them within a group. | |
- (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|> | |
or the end of the string (\Z). | |
- |: This is an OR operator, meaning the regex will match either the pattern | |
before or after it. | |
- ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```), | |
possibly preceded by one or more word characters (\w+), capturing any characters | |
including newlines. | |
""" | |
pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```' | |
matches = re.findall(pattern, data, re.DOTALL) | |
code = [] | |
for match in matches: | |
if match[0]: | |
code.append(match[0].strip()) | |
elif match[1]: | |
code.append(match[1].strip()) | |
return ''.join(code) | |