Add tests with corrupted strings

open-gpdb · Dec 13, 2024 · 019fd68 · 019fd68
1 parent 99cd7c3
commit 019fd68
Show file tree

Hide file tree

Showing 4 changed files with 1,219 additions and 3 deletions.
diff --git a/contrib/try_convert/check_test.py b/contrib/try_convert/check_test.py
@@ -0,0 +1,52 @@
+import re
+
+regression_path = './regression.diffs'
+
+f = open(regression_path)
+lines = f.read().split('\n')
+
+needed_types = [
+    'int2',
+    'int4',
+    'int8',
+    'float4',
+    'float8',
+    'numeric',
+
+    'date',
+    'time',
+    'timestamp',
+    'timetz',
+    'timestamptz',
+    'interval'
+
+    'regproc',
+    'value_day',
+    'oid',
+    'jsonb',
+    'pg_catalog'
+    'reg_class'
+
+    # 'text',
+    # 'bpchar',
+    # 'varchar',
+    # 'char'
+]
+
+for i in range(1, len(lines)):
+    line = lines[i]
+    if len(line) > 0 and len(lines[i-1]) > 0 and line[0] == '-' and lines[i-1][0] != '-':
+        words = re.split('::|\*|;|\n| |\(|\)|,|\.|\".*\"|\'.*\'|<.*>', lines[i-1])
+        ans = []
+        is_prining = False
+        for word in words:
+            if word not in ['select', 'from', 
+                            'try_convert', 'try_convert_by_sql', 'try_convert_by_sql_text', 'try_convert_by_sql_with_len_out', 
+                            'NULL', 'v', 'v1', 'v2', 'where', 'is', 'not', 'distinct', 'as', 't', '']:
+                ans += [word]
+                for w in word.split('_'):
+                    if w in needed_types:
+                        is_prining = True
+        if is_prining:
+            print(' '.join(ans))
+
diff --git a/contrib/try_convert/generate_test.py b/contrib/try_convert/generate_test.py
@@ -147,25 +147,32 @@ def remove_empty_lines(t):
 ### GET TYPE IDs
 
 # TODO Is_have_IO
+# DATA(insert OID = 1231 (  _numeric	 PGNSP PGUID -1 f b A f t \054 0	1700 0 array_in array_out array_recv array_send numerictypmodin numerictypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ ));
 
 f = open(pg_type_path)
 content = f.read()
 
-type_pattern = r'DATA\(insert OID = (.*) \([\s]*(.*?)[\s]';
+type_pattern = r'DATA\(insert OID = (.*) \([\s]+(.*?)[\s]+(.*?\s+){12}(.*?)\s+(.*?)\s+';
 
 type_name_id = {}
 type_id_name = {}
 
 supported_types_count = 0
 
-for (id, name) in re.findall(type_pattern, content):
+for t in re.findall(type_pattern, content):
+    id = t[0]
+    name = t[1]
+    infunc = t[3]
+    outfunc = t[4]
+    # print(len(t), t[3])
     if name != '' and name[0] != '_':
         id = int(id)
         type_id_name[id] = name
         type_name_id[name] = id
 
         if name in supported_types:
             # print(f'|{name}|✅|')
+            print(f'{id} {name} {infunc} {outfunc}')
             supported_types_count += 1
 
 supported_extension_types_count = 0
@@ -188,6 +195,8 @@ def remove_empty_lines(t):
 supported_cast_count = 0
 
 for (source, target, funcid, _, meth) in re.findall(cast_pattern, content):
+    if int(source) not in type_id_name or int(target) not in type_id_name:
+        continue
     casts += [(int(source), int(target), meth)]
     print(type_id_name[int(source)], ' -> ', type_id_name[int(target)], ' via ', meth, f'({funcid} - {func_id_name[funcid]}) ', f'{source}-{target}')
     if type_id_name[int(source)] in supported_types and type_id_name[int(target)] in supported_types:
@@ -456,11 +465,14 @@ def create_test(source_name, target_name, test_data, default='NULL', source_varl
 
                 to_text_in, to_text_out = create_test(type_name, string_type, test_type_table, default_value, type_varlen, string_varlen)
                 from_text_in, from_text_out = create_test(string_type, type_name, text_type_table, default_value, string_varlen, type_varlen)
-                # from_corrupted_text_in, from_corrupted_text_out = create_test(string_type, type_name, test_corrupted_text_data, default_value, string_varlen, type_varlen)
+                from_corrupted_text_in, from_corrupted_text_out = create_test(string_type, type_name, test_corrupted_text_data, default_value, string_varlen, type_varlen)
 
                 text_tests_in += [to_text_in, from_text_in]
                 text_tests_out += [to_text_out, from_text_out]
 
+                text_tests_in += [from_corrupted_text_in]
+                text_tests_out += [from_corrupted_text_out]
+
 # print(text_tests_in[0])
 # print(text_tests_in[1])