Spaces:

ishantvivek
/

codegen

Sleeping

codegen / services /ibm_model /ibm_extract_code_block.py

AP\VivekIsh

codegen: Stage the code

6fadbbc 8 months ago

1.68 kB

	import re
	from services.model_visitor import ModelVisitor


	class IbmExtractCodeblock(ModelVisitor):

	def visit(self, _, data):
	return self._get_code_block(data)

	def _get_code_block(self, data):
	r"""
	Extracts text blocks from the input string based on a specific pattern.
	Args:
	data (str): The input string containing text blocks.
	Returns:
	str: A text block of output which contains code extracted from the input string.
	Regex Pattern:
	(?:### Output: ([\s\S]?))(?:\<\\|endoftext\\|\>\|\Z)\|```(?:\w+)?\n(.?)\n```
	- (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:'
	followed by any characters including newlines, capturing them within a group.
	- (?:\<\\|endoftext\\|\>\|\Z): This part matches either the string <\|endoftext\|>
	or the end of the string (\Z).
	- \|: This is an OR operator, meaning the regex will match either the pattern
	before or after it.
	- ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```),
	possibly preceded by one or more word characters (\w+), capturing any characters
	including newlines.
	"""
	pattern = r'(?:### Output: ([\s\S]?))(?:\<\\|endoftext\\|\>\|\Z)\|```(?:\w+)?\n(.?)\n```'
	matches = re.findall(pattern, data, re.DOTALL)
	code = []
	for match in matches:
	if match[0]:
	code.append(match[0].strip())
	elif match[1]:
	code.append(match[1].strip())
	return ''.join(code)