import re from services.model_visitor import ModelVisitor class IbmExtractCodeblock(ModelVisitor): def visit(self, _, data): return self._get_code_block(data) def _get_code_block(self, data): r""" Extracts text blocks from the input string based on a specific pattern. Args: data (str): The input string containing text blocks. Returns: str: A text block of output which contains code extracted from the input string. Regex Pattern: (?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n``` - (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:' followed by any characters including newlines, capturing them within a group. - (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|> or the end of the string (\Z). - |: This is an OR operator, meaning the regex will match either the pattern before or after it. - ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```), possibly preceded by one or more word characters (\w+), capturing any characters including newlines. """ pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```' matches = re.findall(pattern, data, re.DOTALL) code = [] for match in matches: if match[0]: code.append(match[0].strip()) elif match[1]: code.append(match[1].strip()) return ''.join(code)