Skip to content

Commit

Permalink
Add tests with corrupted strings
Browse files Browse the repository at this point in the history
  • Loading branch information
robozmey committed Dec 16, 2024
1 parent c0120da commit 7191ae4
Show file tree
Hide file tree
Showing 4 changed files with 1,219 additions and 3 deletions.
52 changes: 52 additions & 0 deletions contrib/try_convert/check_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import re

regression_path = './regression.diffs'

f = open(regression_path)
lines = f.read().split('\n')

needed_types = [
'int2',
'int4',
'int8',
'float4',
'float8',
'numeric',

'date',
'time',
'timestamp',
'timetz',
'timestamptz',
'interval'

'regproc',
'value_day',
'oid',
'jsonb',
'pg_catalog'
'reg_class'

# 'text',
# 'bpchar',
# 'varchar',
# 'char'
]

for i in range(1, len(lines)):
line = lines[i]
if len(line) > 0 and len(lines[i-1]) > 0 and line[0] == '-' and lines[i-1][0] != '-':
words = re.split('::|\*|;|\n| |\(|\)|,|\.|\".*\"|\'.*\'|<.*>', lines[i-1])
ans = []
is_prining = False
for word in words:
if word not in ['select', 'from',
'try_convert', 'try_convert_by_sql', 'try_convert_by_sql_text', 'try_convert_by_sql_with_len_out',
'NULL', 'v', 'v1', 'v2', 'where', 'is', 'not', 'distinct', 'as', 't', '']:
ans += [word]
for w in word.split('_'):
if w in needed_types:
is_prining = True
if is_prining:
print(' '.join(ans))

18 changes: 15 additions & 3 deletions contrib/try_convert/generate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,25 +147,32 @@ def remove_empty_lines(t):
### GET TYPE IDs

# TODO Is_have_IO
# DATA(insert OID = 1231 ( _numeric PGNSP PGUID -1 f b A f t \054 0 1700 0 array_in array_out array_recv array_send numerictypmodin numerictypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ ));

f = open(pg_type_path)
content = f.read()

type_pattern = r'DATA\(insert OID = (.*) \([\s]*(.*?)[\s]';
type_pattern = r'DATA\(insert OID = (.*) \([\s]+(.*?)[\s]+(.*?\s+){12}(.*?)\s+(.*?)\s+';

type_name_id = {}
type_id_name = {}

supported_types_count = 0

for (id, name) in re.findall(type_pattern, content):
for t in re.findall(type_pattern, content):
id = t[0]
name = t[1]
infunc = t[3]
outfunc = t[4]
# print(len(t), t[3])
if name != '' and name[0] != '_':
id = int(id)
type_id_name[id] = name
type_name_id[name] = id

if name in supported_types:
# print(f'|{name}|✅|')
print(f'{id} {name} {infunc} {outfunc}')
supported_types_count += 1

supported_extension_types_count = 0
Expand All @@ -188,6 +195,8 @@ def remove_empty_lines(t):
supported_cast_count = 0

for (source, target, funcid, _, meth) in re.findall(cast_pattern, content):
if int(source) not in type_id_name or int(target) not in type_id_name:
continue
casts += [(int(source), int(target), meth)]
print(type_id_name[int(source)], ' -> ', type_id_name[int(target)], ' via ', meth, f'({funcid} - {func_id_name[funcid]}) ', f'{source}-{target}')
if type_id_name[int(source)] in supported_types and type_id_name[int(target)] in supported_types:
Expand Down Expand Up @@ -456,11 +465,14 @@ def create_test(source_name, target_name, test_data, default='NULL', source_varl

to_text_in, to_text_out = create_test(type_name, string_type, test_type_table, default_value, type_varlen, string_varlen)
from_text_in, from_text_out = create_test(string_type, type_name, text_type_table, default_value, string_varlen, type_varlen)
# from_corrupted_text_in, from_corrupted_text_out = create_test(string_type, type_name, test_corrupted_text_data, default_value, string_varlen, type_varlen)
from_corrupted_text_in, from_corrupted_text_out = create_test(string_type, type_name, test_corrupted_text_data, default_value, string_varlen, type_varlen)

text_tests_in += [to_text_in, from_text_in]
text_tests_out += [to_text_out, from_text_out]

text_tests_in += [from_corrupted_text_in]
text_tests_out += [from_corrupted_text_out]

# print(text_tests_in[0])
# print(text_tests_in[1])

Expand Down
Loading

0 comments on commit 7191ae4

Please sign in to comment.