Skip to content

Commit

Permalink
Add test generation script
Browse files Browse the repository at this point in the history
  • Loading branch information
robozmey committed Nov 28, 2024
1 parent 7d706f8 commit b927428
Show file tree
Hide file tree
Showing 3 changed files with 314 additions and 0 deletions.
259 changes: 259 additions & 0 deletions contrib/try_convert/generate_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
import re

pg_type_path = '/home/robozmey/gpdb_src/src/include/catalog/pg_type.h'
pg_cast_path = '/home/robozmey/gpdb_src/src/include/catalog/pg_cast.h'


def remove_empty_lines(t):
return "\n".join([s for s in t.split("\n") if s])


### GET TYPE IDs

# TODO Is_have_IO

f = open(pg_type_path)
content = f.read()

type_pattern = r'DATA\(insert OID = (.*) \([\s]*(.*?)[\s]';

type_name_id = {}
type_id_name = {}

for (id, name) in re.findall(type_pattern, content):
if name != '' and name[0] != '_':
id = int(id)
type_id_name[id] = name
type_name_id[name] = id

print('Types found:', len(type_id_name))
# print('\n'.join([str(i) + ' ' + type_id_name[i] for i in sorted(type_id_name)]))


### GET CONVERTS

f = open(pg_cast_path)
content = f.read()

cast_pattern = r'DATA\(insert \([\s]*(\d+)[\s]+(\d+)[\s]+(\d+)[\s]+(.)[\s]+(.)';

casts = []

for (source, target, _, _, meth) in re.findall(cast_pattern, content):
casts += [(int(source), int(target), meth)]

print('Casts found:', len(casts))
# print('\n'.join([str(c[0]) + ' ' + c[1] + ' ' + c[2] for c in casts]))

supported_types = [
'int8', # NUMBERS
'int4',
'int2',
'float8',
'float4',
'numeric',
# 'bit',
# 'bool',
# 'varbit',
# 'date', # TIME
# 'time',
# 'timez',
# 'timestamp',
# 'timestampz',
# 'box', # GEOMENTY
# 'circle',
# 'line',
# 'lseg',
# 'path',
# 'point',
# 'polygon',
# 'cidr', # IP
# 'inet',
# 'json', # JSON
# 'jsonb',
# 'bytea', # STRINGS
# 'char',
# 'varchar',
# 'text',
# 'interval',
# 'macaddr',
# 'macaddr8',
# 'money',
# # 'pg_lsn',
# # 'tsquery',
# # 'tsvector',
# # 'txid_snapshot',
# 'uuid',
# 'xml'
]

print(supported_types)


### HEADER & FOOTER

test_header = open('test_header.sql').read()
test_footer = open('test_footer.sql').read()


### CREATE DATA

import random

random.seed(42)

numbers = {
'int2' : ((-32768, 32767), False),
'int4' : ((-2147483648, 2147483647), False),
'int8' : ((-9223372036854775808, 9223372036854775807), False),
'float4' : ((10**6, 10**6), True),
'float8' : ((10**15, 10**15), True),
'numeric' : ((10**20, 10**20), True),
}


test_create_data = '-- CREATE DATA\n'

for number_type in numbers:

type_range = numbers[number_type][0]
is_float = numbers[number_type][1]

mn = type_range[0]
mx = type_range[1]

nln = 0
ln = len(str(type_range[1]))-1

table_name = f'tt_{number_type}'

test_create_data += f'CREATE TABLE {table_name} (v {number_type}) DISTRIBUTED BY (v);\n'

test_create_data += f'INSERT INTO {table_name} (v) VALUES\n'

values = []

for c in range(nln, ln):
rep = 20 // ln + 1
for _ in range(rep):
n = random.random()
if c >= 0:
n *= (10 ** (c+1))
else:
n /= (10 ** (-c))
if not is_float:
n = int(n)

values += [f'({n})']

test_create_data += ',\n'.join(values) + ';'

##### COPY real_city FROM '@abs_srcdir@/data/real_city.data';

## GET DATA

data_sources = {
'json' : '@abs_srcdir@/data/jsonb.data'
}

def get_data(type_name):
return f'tt_{type_name}'



## TEST

def create_test(source_name, target_name, test_data):

query = f'select test_cast({source_name}, {target_name}, {test_data});'
query = \
f'select count(*) from (' \
f'select (' \
f'try_convert(v, NULL::{target_name}) ' \
f'is not distinct from '\
f'try_convert_by_sql(v::text, NULL::{target_name}, \'{source_name}\'::text)' \
f') from {test_data}' \
f') as t(eq) where not eq;'
result = \
' count \n' \
'-------\n' \
' 0\n' \
'(1 row)\n' \

input_source = query
output_source = remove_empty_lines(query) + '\n' + result

return input_source, output_source


### CAST to & from text

text_tests_in = []
text_tests_out = []

for type_name in supported_types:
test_type_data = get_data(type_name)
test_text_data = f'(select v::text from {test_type_data}) as t(v)'

to_text_in, to_text_out = create_test(type_name, 'text', test_type_data)
from_text_in, from_text_out = create_test('text', type_name, test_text_data)

text_tests_in += [to_text_in, from_text_in]
text_tests_out += [to_text_out, from_text_out]

print(text_tests_in[0])
print(text_tests_in[1])


### CAST from pg_cast

function_tests_in = []
function_tests_out = []

for (source_id, target_id, method) in casts:

source_name = type_id_name[source_id]
target_name = type_id_name[target_id]
test_data = get_data(source_name)

if (source_name not in supported_types or target_name not in supported_types):
continue

test_in, test_out = create_test(source_name, target_name, test_data)

function_tests_in += [test_in]
function_tests_out += [test_out]

print(function_tests_in[0])


### CONSTRUCT TEST

test_str = '\n'.join([
test_header, \
# CREATE DATA
test_create_data, \
'-- TEXT TESTS', \
'\n'.join(text_tests_in), \
'-- FUNCTION TESTS', \
'\n'.join(function_tests_in), \
test_footer
]) + '\n'

test_f = open('sql/try_convert.sql', 'w')
test_f.write(test_str)


test_str = '\n'.join([
remove_empty_lines(test_header), \
# CREATE DATA
remove_empty_lines(test_create_data), \
'-- TEXT TESTS', \
'\n'.join(text_tests_out), \
'-- FUNCTION TESTS', \
'\n'.join(function_tests_out), \
remove_empty_lines(test_footer)
]) + '\n'

test_f = open('expected/try_convert.out', 'w')
test_f.write(test_str)
3 changes: 3 additions & 0 deletions contrib/try_convert/test_footer.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- FOOTER

reset search_path;
52 changes: 52 additions & 0 deletions contrib/try_convert/test_header.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
-- SCRIPT-GENERATED TEST for TRY_CONVERT

create schema tryconvert;
set search_path = tryconvert;

-- start_ignore
CREATE EXTENSION IF NOT EXISTS try_convert;
-- end_ignore

-- SQL try_convert to compare with C one
CREATE OR REPLACE FUNCTION try_convert_by_sql(_in text, INOUT _out ANYELEMENT, source_type text default 'text')
LANGUAGE plpgsql AS
$func$
BEGIN
EXECUTE format('SELECT (%L::%s)::%s', $1, source_type, pg_typeof(_out))
INTO _out;
EXCEPTION WHEN others THEN
-- do nothing: _out already carries default
END
$func$;
-- corrupts string
CREATE OR REPLACE FUNCTION corrupt(value text) RETURNS text
LANGUAGE SQL AS 'select ''!@#$%^&*'' || value || ''!@#$%^&*''';
-- corrupts by random
CREATE OR REPLACE FUNCTION corrupt_random(value text, r float default 0.5) RETURNS text
LANGUAGE SQL AS 'select case when random() < r then corrupt(value) else value end';
-- r
CREATE OR REPLACE FUNCTION test_cast_random(_source regtype, _target regtype, n int default 20, k int default 1000, OUT _out int)
LANGUAGE plpgsql AS
$func$
BEGIN
EXECUTE
'select count(*) from (
select (try_convert(v, NULL::' || _target || ') is not distinct from try_convert_by_sql(v::text, NULL::' || _target || ', ''' || _source || '''::text))
from (select (random()*' || k || ')::' || _source || ' from generate_series(1, ' || n || ')) as g(v)
) as t(eq) where not eq;'
INTO _out;
END
$func$;
-- test cast
CREATE OR REPLACE FUNCTION test_cast(_source regtype, _target regtype, n int default 20, k int default 1000, OUT _out int)
LANGUAGE plpgsql AS
$func$
BEGIN
EXECUTE
'select count(*) from (
select (try_convert(v, NULL::' || _target || ') is not distinct from try_convert_by_sql(v::text, NULL::' || _target || ', ''' || _source || '''::text))
from (select (random()*' || k || ')::' || _source || ' from generate_series(1, ' || n || ')) as g(v)
) as t(eq) where not eq;'
INTO _out;
END
$func$;

0 comments on commit b927428

Please sign in to comment.