Skip to content

Commit

Permalink
Add support for collating sequences on indexed files
Browse files Browse the repository at this point in the history
  • Loading branch information
ddeclerck committed Jan 26, 2024
1 parent 824f2a6 commit d6f4c29
Show file tree
Hide file tree
Showing 14 changed files with 439 additions and 55 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ NEWS - user visible changes -*- outline -*-
build system do not correctly work together to locate files from
diagnostic output

** New option -fdefault-file-colseq to specify default file collating sequence

* More notable changes

** execution times were significantly reduced for the following:
Expand Down
11 changes: 11 additions & 0 deletions cobc/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@

2024-01-25 David Declerck <[email protected]>

FR #459: support COLLATING SEQUENCE clause on SELECT / INDEXED files
* codegen.c (output_file_initialization): output the indexed
file collating sequence (was already present in the AST)
* parser.y (collating_sequence_clause): remove the
CB_PENDING warning on file collating sequence
* flag.def, tree.h, cobc.c, parser.y: add and handle a new
-fdefault-file-colseq flag to specify the default collating
sequence to use for files without a collating sequence clause

2023-11-29 Fabrice Le Fessant <[email protected]>

* cobc.c (cobc_clean_up): when save-temps specifies a directory,
Expand Down
42 changes: 25 additions & 17 deletions cobc/cobc.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,22 +90,23 @@ enum compile_level {
CB_LEVEL_EXECUTABLE = 7
};

#define CB_FLAG_GETOPT_STACK_SIZE 1
#define CB_FLAG_GETOPT_IF_CUTOFF 2
#define CB_FLAG_GETOPT_SIGN 3
#define CB_FLAG_GETOPT_FOLD_COPY 4
#define CB_FLAG_GETOPT_FOLD_CALL 5
#define CB_FLAG_GETOPT_TTITLE 6
#define CB_FLAG_GETOPT_MAX_ERRORS 7
#define CB_FLAG_GETOPT_DUMP 8
#define CB_FLAG_GETOPT_CALLFH 9
#define CB_FLAG_GETOPT_INTRINSICS 10
#define CB_FLAG_GETOPT_EC 11
#define CB_FLAG_GETOPT_NO_EC 12
#define CB_FLAG_GETOPT_NO_DUMP 13
#define CB_FLAG_GETOPT_EBCDIC_TABLE 14
#define CB_FLAG_GETOPT_DEFAULT_COLSEQ 15
#define CB_FLAG_MEMORY_CHECK 16
#define CB_FLAG_GETOPT_STACK_SIZE 1
#define CB_FLAG_GETOPT_IF_CUTOFF 2
#define CB_FLAG_GETOPT_SIGN 3
#define CB_FLAG_GETOPT_FOLD_COPY 4
#define CB_FLAG_GETOPT_FOLD_CALL 5
#define CB_FLAG_GETOPT_TTITLE 6
#define CB_FLAG_GETOPT_MAX_ERRORS 7
#define CB_FLAG_GETOPT_DUMP 8
#define CB_FLAG_GETOPT_CALLFH 9
#define CB_FLAG_GETOPT_INTRINSICS 10
#define CB_FLAG_GETOPT_EC 11
#define CB_FLAG_GETOPT_NO_EC 12
#define CB_FLAG_GETOPT_NO_DUMP 13
#define CB_FLAG_GETOPT_EBCDIC_TABLE 14
#define CB_FLAG_GETOPT_DEFAULT_COLSEQ 15
#define CB_FLAG_GETOPT_DEFAULT_FILE_COLSEQ 16
#define CB_FLAG_MEMORY_CHECK 17


/* Info display limits */
Expand Down Expand Up @@ -3813,6 +3814,13 @@ process_command_line (const int argc, char **argv)
}
break;

case CB_FLAG_GETOPT_DEFAULT_FILE_COLSEQ: /* 16 */
/* -fdefault-file-colseq=<ASCII/EBCDIC/NATIVE> */
if (cb_deciph_default_file_colseq_name (cob_optarg)) {
cobc_err_exit (COBC_INV_PAR, "-fdefault-file-colseq");
}
break;

case CB_FLAG_GETOPT_FOLD_COPY: /* 4 */
/* -ffold-copy=<UPPER/LOWER> : COPY fold case */
if (!cb_strcasecmp (cob_optarg, "UPPER")) {
Expand Down Expand Up @@ -3892,7 +3900,7 @@ process_command_line (const int argc, char **argv)
}
break;

case CB_FLAG_MEMORY_CHECK: /* 16 */
case CB_FLAG_MEMORY_CHECK: /* 17 */
/* -fmemory-check=<scope> : */
if (!cob_optarg) {
cb_flag_memory_check = CB_MEMCHK_ALL;
Expand Down
28 changes: 28 additions & 0 deletions cobc/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -9320,6 +9320,20 @@ output_file_initialization (struct cb_file *f)
} else {
output_line ("%s%s->offset = 0;", CB_PREFIX_KEYS, f->cname);
}
if (f->organization == COB_ORG_INDEXED) {
/* TODO: handle record keys */
cb_tree col = f->collating_sequence;
output_prefix ();
output ("%s%s->collating_sequence = ",
CB_PREFIX_KEYS, f->cname);
if ((col != NULL) && CB_REFERENCE_P (col)) {
output_param (cb_ref(col), -1);
output (";");
} else {
output ("NULL;");
}
output_newline ();
}
nkeys = 1;
for (l = f->alt_key_list; l; l = l->next) {
output_prefix ();
Expand All @@ -9342,6 +9356,20 @@ output_file_initialization (struct cb_file *f)
f->cname, nkeys);
output_key_components (f, l->component_list, nkeys);
}
if (f->organization == COB_ORG_INDEXED) {
/* TODO: handle record keys */
cb_tree col = f->collating_sequence;
output_prefix ();
output ("(%s%s + %d)->collating_sequence = ",
CB_PREFIX_KEYS, f->cname, nkeys);
if ((col != NULL) && CB_REFERENCE_P (col)) {
output_param (cb_ref(col), -1);
output (";");
} else {
output ("NULL;");
}
output_newline ();
}
nkeys++;
}
#if 0 /* now done in cob_file_malloc / cob_file_external_addr */
Expand Down
4 changes: 4 additions & 0 deletions cobc/flag.def
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ CB_FLAG_NQ (1, "default-colseq", CB_FLAG_GETOPT_DEFAULT_COLSEQ,
_(" -fdefault-colseq=[ASCII|EBCDIC|NATIVE]\tdefine default collating sequence\n"
" * default: NATIVE"))

CB_FLAG_NQ (1, "default-file-colseq", CB_FLAG_GETOPT_DEFAULT_FILE_COLSEQ,
_(" -fdefault-file-colseq=[ASCII|EBCDIC|NATIVE]\tdefine default file collating sequence\n"
" * default: NATIVE"))

/* Binary flags */

/* Flags with suppressed help */
Expand Down
43 changes: 16 additions & 27 deletions cobc/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -336,29 +336,6 @@ check_non_area_a (cb_tree stmt) {

/* Collating sequences */

/* Known collating sequences/alphabets */
enum cb_colseq {
CB_COLSEQ_NATIVE,
CB_COLSEQ_ASCII,
CB_COLSEQ_EBCDIC,
};
enum cb_colseq cb_default_colseq = CB_COLSEQ_NATIVE;

/* Decipher character conversion table names */
int cb_deciph_default_colseq_name (const char * const name)
{
if (!cb_strcasecmp (name, "ASCII")) {
cb_default_colseq = CB_COLSEQ_ASCII;
} else if (!cb_strcasecmp (name, "EBCDIC")) {
cb_default_colseq = CB_COLSEQ_EBCDIC;
} else if (!cb_strcasecmp (name, "NATIVE")) {
cb_default_colseq = CB_COLSEQ_NATIVE;
} else {
return 1;
}
return 0;
}

static cb_tree
build_colseq_tree (const char *alphabet_name,
int alphabet_type,
Expand Down Expand Up @@ -901,23 +878,34 @@ check_relaxed_syntax (const cob_flags_t lev)
}

static void
setup_default_collation (struct cb_program *program) {
switch (cb_default_colseq) {
prepare_default_collation (enum cb_colseq colseq) {
switch (colseq) {
#ifdef COB_EBCDIC_MACHINE
case CB_COLSEQ_ASCII:
#else
case CB_COLSEQ_EBCDIC:
#endif
alphanumeric_collation = build_colseq (cb_default_colseq);
alphanumeric_collation = build_colseq (colseq);
break;
default:
alphanumeric_collation = NULL;
}
national_collation = NULL; /* TODO: default national collation */
}

static void
setup_default_collation (struct cb_program *program) {
prepare_default_collation (cb_default_colseq);
program->collating_sequence = alphanumeric_collation;
program->collating_sequence_n = national_collation;
}

static void
setup_default_file_collation (struct cb_file *file) {
prepare_default_collation (cb_default_file_colseq);
file->collating_sequence = alphanumeric_collation;
}

static void
program_init_without_program_id (void)
{
Expand Down Expand Up @@ -5365,6 +5353,7 @@ file_control_entry:

}
key_type = NO_KEY;
setup_default_file_collation (current_file);
}
_select_clauses_or_error
{
Expand Down Expand Up @@ -5752,7 +5741,7 @@ collating_sequence_clause:
check_repeated ("COLLATING", SYN_CLAUSE_3, &check_duplicate);
current_file->collating_sequence = alphanumeric_collation;
current_file->collating_sequence_n = national_collation;
CB_PENDING ("FILE COLLATING SEQUENCE");
CB_UNFINISHED ("FILE COLLATING SEQUENCE");
}
;

Expand Down
32 changes: 32 additions & 0 deletions cobc/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -7414,6 +7414,38 @@ cb_build_ml_suppress_checks (struct cb_ml_generate_tree *tree)
}


enum cb_colseq cb_default_colseq = CB_COLSEQ_NATIVE;
enum cb_colseq cb_default_file_colseq = CB_COLSEQ_NATIVE;

/* Decipher character conversion table names */
static int
cb_deciph_colseq_name (const char * const name, enum cb_colseq *colseq)
{
if (!cb_strcasecmp (name, "ASCII")) {
*colseq = CB_COLSEQ_ASCII;
} else if (!cb_strcasecmp (name, "EBCDIC")) {
*colseq = CB_COLSEQ_EBCDIC;
} else if (!cb_strcasecmp (name, "NATIVE")) {
*colseq = CB_COLSEQ_NATIVE;
} else {
return 1;
}
return 0;
}

int
cb_deciph_default_colseq_name (const char * const name)
{
return cb_deciph_colseq_name (name, &cb_default_colseq);
}

int
cb_deciph_default_file_colseq_name (const char * const name)
{
return cb_deciph_colseq_name (name, &cb_default_file_colseq);
}


#ifndef HAVE_DESIGNATED_INITS
void
cobc_init_tree (void)
Expand Down
13 changes: 13 additions & 0 deletions cobc/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2343,6 +2343,7 @@ extern cb_tree cb_debug_sub_3;
extern cb_tree cb_debug_contents;

extern int cb_deciph_default_colseq_name (const char *const);
extern int cb_deciph_default_file_colseq_name (const char *const);

extern struct cb_program *cb_build_program (struct cb_program *,
const int);
Expand Down Expand Up @@ -2740,5 +2741,17 @@ extern int cobc_has_areacheck_directive (const char *directive);
#define CB_CHAIN_PAIR(x,y,z) x = cb_pair_add (x, y, z)
#define CB_FIELD_ADD(x,y) x = cb_field_add (x, y)

enum cb_colseq {
CB_COLSEQ_NATIVE,
CB_COLSEQ_ASCII,
CB_COLSEQ_EBCDIC,
};

extern enum cb_colseq cb_default_colseq;
extern enum cb_colseq cb_default_file_colseq;

extern int cb_deciph_default_colseq_name (const char * const name);
extern int cb_deciph_default_file_colseq_name (const char * const name);


#endif /* CB_TREE_H */
10 changes: 10 additions & 0 deletions libcob/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@

2024-01-25 David Declerck <[email protected]>

FR #459: support COLLATING SEQUENCE clause on SELECT / INDEXED files
* fileio.c (bdb_setkeycol, bdb_bt_compare, indexed_open, ...):
take the file collating sequence into account when comparing keys
* common.h, common.c, coblocal.h: make common_cmps available locally
* common.h, common.c, fileio.c: split the sort_collating field to
distinguish actual collating sequence and code-set for writing
(new field for the latter: code_set_write, to match code_set_read)

2023-12-14 David Declerck <[email protected]>

* common.c (cob_terminate_routines, cob_call_with_exception_check):
Expand Down
3 changes: 3 additions & 0 deletions libcob/coblocal.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,9 @@ cob_max_int (const int x, const int y)
return y;
}

COB_HIDDEN int cob_cmps (const unsigned char *, const unsigned char *,
const size_t, const unsigned char *);

#undef COB_HIDDEN

#endif /* COB_LOCAL_H */
12 changes: 6 additions & 6 deletions libcob/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1822,8 +1822,8 @@ common_cmpc (const unsigned char *p, const unsigned int c,

/* compare up to 'size' characters in 's1' to 's2'
using collation 'col' */
static int
common_cmps (const unsigned char *s1, const unsigned char *s2,
int
cob_cmps (const unsigned char *s1, const unsigned char *s2,
const size_t size, const unsigned char *col)
{
register const unsigned char *end = s1 + size;
Expand Down Expand Up @@ -1943,15 +1943,15 @@ cob_cmp_all (cob_field *f1, cob_field *f2)
const size_t chunk_size = size2;
size_t size_loop = size1;
while (size_loop >= chunk_size) {
if ((ret = common_cmps (data1, data2, chunk_size, col)) != 0) {
if ((ret = cob_cmps (data1, data2, chunk_size, col)) != 0) {
break;
}
size_loop -= chunk_size;
data1 += chunk_size;
}
if (!ret
&& size1 > 0) {
ret = common_cmps (data1, data2, size_loop, col);
ret = cob_cmps (data1, data2, size_loop, col);
}
}

Expand Down Expand Up @@ -1991,7 +1991,7 @@ cob_cmp_alnum (cob_field *f1, cob_field *f2)
} else { /* check with collation */

/* Compare common substring */
if ((ret = common_cmps (data1, data2, min, col)) != 0) {
if ((ret = cob_cmps (data1, data2, min, col)) != 0) {
return ret;
}

Expand Down Expand Up @@ -2052,7 +2052,7 @@ sort_compare_collate (const void *data1, const void *data2)
if (COB_FIELD_IS_NUMERIC (&f1)) {
res = cob_numeric_cmp (&f1, &f2);
} else {
res = common_cmps (f1.data, f2.data, f1.size, sort_collate);
res = cob_cmps (f1.data, f2.data, f1.size, sort_collate);
}
if (res != 0) {
return (sort_keys[i].flag == COB_ASCENDING) ? res : -res;
Expand Down
4 changes: 1 addition & 3 deletions libcob/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1370,9 +1370,7 @@ typedef struct __cob_file_key {
unsigned int offset; /* Offset of field */
int count_components; /* 0..1::simple-key 2..n::split-key */
cob_field *component[COB_MAX_KEYCOMP]; /* key-components iff split-key */
#if 0 /* TODO (for file keys, not for SORT/MERGE) */
const unsigned char *collating_sequence; /* COLLATING */
#endif
const unsigned char *collating_sequence; /* COLLATING (for file keys, not for SORT/MERGE) */
} cob_file_key;


Expand Down
Loading

0 comments on commit d6f4c29

Please sign in to comment.