File size: 3,999 Bytes
22507c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116


import re
import json
import ast
from turtle import color
from pydantic import ValidationError
from termcolor import colored

def extract_code_block(text):
    code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL)
    
    if code_block:
        try:
            return [ast.literal_eval(block) for block in code_block]
        except (SyntaxError, ValueError) as e:
            return f"Error parsing code block: {e}"
    return None



def extract_code_block(text):
    code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL)
    
    if code_block:
        try:
            return [json.loads(block) for block in code_block]
        except json.JSONDecodeError:
            return None
    return None

# def fallback_extract(text, expected_keys):
#     fallback_dict = {}
#     for i, key in enumerate(expected_keys):
#         match = re.search(rf'"{key}"\s*:\s*([^\s,]+)', text)
#         if match:
#             value = match.group(1).strip('"').strip(',')
#             if value.isdigit():
#                 fallback_dict[key] = int(value)
#             elif re.match(r'^\{.*\}$', value):  # Detect dictionary structure
#                 try:
#                     fallback_dict[key] = json.loads(value)
#                 except json.JSONDecodeError:
#                     fallback_dict[key] = value  # Leave it as a string if malformed
#             else:
#                 fallback_dict[key] = value
#         else:
#             fallback_dict[key] = None  # If the key is not found, set it to None
#     return fallback_dict

def fallback_extract(text, expected_keys):
    fallback_dict = {}
    pattern = r'"({})"\s*:\s*(.*?)(?="(?:{})"|\Z)'.format(
        '|'.join(re.escape(key) for key in expected_keys),
        '|'.join(re.escape(key) for key in expected_keys)
    )
    
    matches = re.finditer(pattern, text, re.DOTALL)
    
    for match in matches:
        key, value = match.groups()
        value = value.strip().rstrip(',').strip()
        
        if value.isdigit():
            fallback_dict[key] = int(value)
        elif value.lower() in ['true', 'false']:
            fallback_dict[key] = value.lower() == 'true'
        elif re.match(r'^\{.*\}$', value):  # Detect dictionary structure
            try:
                fallback_dict[key] = json.loads(value)
            except json.JSONDecodeError:
                fallback_dict[key] = value  # Leave it as a string if malformed
        else:
            # Remove surrounding quotes if present
            fallback_dict[key] = value.strip('"')
    
    # Add any missing keys with None value
    for key in expected_keys:
        if key not in fallback_dict:
            fallback_dict[key] = None
    
    return fallback_dict

# Main function to handle parsing with fallback
def parse_with_fallback(text, pydantic_class):
    # Extract expected keys from the Pydantic class
    expected_keys = list(pydantic_class.__fields__.keys())
    
    # First try to extract clean JSON blocks
    parsed_blocks = extract_code_block(text)
    
    if parsed_blocks:
        # Validate and return parsed data
        try:
            classes = [pydantic_class(**block) for block in parsed_blocks]
            print(colored('used code block', 'red'))
            print(colored('Got this: {0}'.format(classes[0]), 'red'))
            print(colored('from this: {0}'.format(text), 'cyan'))
            
            return classes[0]
        except ValidationError as e:
            print("Validation error:", e)
    
    # Fallback to manually extracting key-value pairs
    fallback_data = fallback_extract(text, expected_keys)
    
    try:
        # Try to validate the fallback data with the Pydantic class
        print(colored('used fallback', 'red'))
        print(colored('Got this: {0}'.format(fallback_data), 'red'))
        print(colored('from this: {0}'.format(text), 'cyan'))

        return pydantic_class(**fallback_data)
    except ValidationError as e:
        return None