-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathsexp.h
784 lines (697 loc) · 29 KB
/
sexp.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
/**
@cond IGNORE
======================================================
SFSEXP: Small, Fast S-Expression Library
Written by Matthew Sottile ([email protected])
======================================================
Copyright (2003-2006). The Regents of the University of California. This
material was produced under U.S. Government contract W-7405-ENG-36 for Los
Alamos National Laboratory, which is operated by the University of
California for the U.S. Department of Energy. The U.S. Government has rights
to use, reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR
THE UNIVERSITY MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY
LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified to produce
derivative works, such modified software should be clearly marked, so as not
to confuse it with the version available from LANL.
Additionally, this library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation; either version 2.1 of the
License, or (at your option) any later version.
This library is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, U SA
LA-CC-04-094
@endcond
**/
#ifndef __SEXP_H__
#define __SEXP_H__
#include <stddef.h>
#include <stdio.h> /* for BUFSIZ only */
#include "faststack.h"
#include "cstring.h"
#include "sexp_memory.h"
#include "sexp_errors.h"
/* doxygen documentation groups defined here */
/**
* \defgroup IO I/O routines
*/
/**
* \defgroup parser Parser routines
*/
/**
* \mainpage A small and quick S-expression parsing library.
*
* \section intro Introduction
*
* This library was created to provide s-expression parsing and manipulation
* facilities to C and C++ programs. The primary goals were speed and
* efficiency - low memory impact, and the highest speed we could achieve in
* parsing. Surprisingly, no other libraries on the net were found that were
* not bloated with features or involved embedding a full-fledged LISP or
* Scheme interpreter into our programs. So, this library evolved to fill
* this gap. As such, it does not guarantee that every valid LISP
* expression is parseable, and many features that are not required aren't
* implemented. See Rivest's S-expression library for an example of a much
* more featureful library.
*
* What features does this library include? At the heart of the code is a
* continuation-based parser implementing a basic parser state machine.
* Continuations allow users to accumulate multiple streams of characters,
* and parse each stream simultaneously. A continuation allows the parser
* to stop midstream, start working on a new expression, and return to the
* first without corruption of complex state management in the users code.
* No threads, no forking, nothing more than a data structure that must be
* passed in and captured as data becomes available to parse. Once an
* expression has been parsed, a simple structure is returned that
* represents the "abstract syntax tree" of the parsed expression. For the
* majority of users, the parser and this data structure will be all that
* they will ever need to see. For convenience reasons, other functions
* such as I/O wrappers and AST traversal routines have been included, but
* they are not required if users don't wish to use them.
*
* \section credits Credits
*
* SFSEXP: Small, Fast S-Expression Library version \n
* Written by Matthew Sottile ([email protected])
*
* \section license License Information
*
* Copyright (2003-2006). The Regents of the University of California. This
* material was produced under U.S. Government contract W-7405-ENG-36 for Los
* Alamos National Laboratory, which is operated by the University of
* California for the U.S. Department of Energy. The U.S. Government has rights
* to use, reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR
* THE UNIVERSITY MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY
* LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified to produce
* derivative works, such modified software should be clearly marked, so as not
* to confuse it with the version available from LANL.
*
* Additionally, this library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, U SA
*
* LA-CC-04-094
*
*/
/**
* \file sexp.h
*
* \brief API for a small, fast and portable s-expression parser library.
*/
/*==============*/
/* ENUMERATIONS */
/*==============*/
/**
* An element in an s-expression can be one of three types: a <i>value</i>
* represents an atom with an associated text value. A <i>list</i>
* represents an s-expression, and the element contains a pointer to
* the head element of the associated s-expression.
*/
typedef enum {
/**
* An atom of some type. See atom type (aty) field of element structure
* for details as to which atom type this is.
*/
SEXP_VALUE,
/**
* A list. This means the element points to an element representing the
* head of a list.
*/
SEXP_LIST
} elt_t;
/**
* For an element that represents a value, the value can be interpreted
* as a more specific type. A <i>basic</i> value is a simple string with
* no whitespace (and therefore no quotes required). A <i>double quote</i>
* value, or <i>dquote</i>, is one that contains characters (such as
* whitespace) that requires quotation marks to contain the string. A
* <i>single quote</i> value, or <i>squote</i>, represents an element that is
* prefaced with a single tick-mark. This can be either an atom or
* s-expression, and the result is that the parser does not attempt to parse
* the element following the tick mark. It is simply stored as text. This
* is similar to the meaning of a tick mark in the Scheme or LISP family
* of programming languages. Finally, <i>binary</I> allows raw binary to
* be stored within an atom. Note that if the binary type is used, the data
* is stored in bindata with the length in binlength. Otherwise, the data
* us stored in the val field with val_used and val_allocated tracking the
* size of the value string and the total memory allocated for it.
*/
typedef enum {
/**
* Basic, unquoted value.
*/
SEXP_BASIC,
/**
* Single quote (tick-mark) value - contains a string representing
* a non-parsed portion of the s-expression.
*/
SEXP_SQUOTE,
/**
* Double-quoted string. Similar to a basic value, but potentially
* containing white-space.
*/
SEXP_DQUOTE,
/**
* Binary data. This is used when the specialized parser is active
* and supports inlining of binary blobs of data inside an expression.
*/
SEXP_BINARY
} atom_t;
/*============*/
/* STRUCTURES */
/*============*/
/**
* An s-expression is represented as a linked structure of elements,
* where each element is either an <I>atom</I> or <I>list</I>. An
* atom corresponds to a string, while a list corresponds to an
* s-expression. The following grammar represents our definition of
* an s-expression:<P>
*
* <pre>
* sexpr ::= ( sx )
* sx ::= atom sxtail | sexpr sxtail | 'sexpr sxtail | 'atom sxtail | NULL
* sxtail ::= sx | NULL
* atom ::= quoted | value
* quoted ::= "ws_string"
* value ::= nws_string
* </pre>
* <P>
*
* An atom can either be a quoted string, which is a string containing
* whitespace (possibly) surrounded by double quotes, or a non-whitespace
* string that does not require surrounding quotes. An element representing
* an atom will have a type of <i>value</i> and data stored in the <i>val</i>
* field. An element of type <i>list</i> represents an s-expression
* corresponding to <i>sexpr</i> in the grammar, and will have a pointer to
* the head of the appropriate s-expression. Details regarding these fields
* and their values given with the fields themselves. Notice that a single
* quote can appear directly before an s-expression or atom, similar to the
* use in LISP.
*/
typedef struct elt {
/**
* The element has a type that determines how the structure is used.
* If the type is <B>SEXP_VALUE</B>, then a programmer knows that
* either the val field or bindata field is meaningful depending on
* the value of the aty field, and contains the data associated with
* this element of the s-expression. If the type is
* <B>SEXP_LIST</B>, then the list field contains a pointer to the
* s-expression element representing the head of the list. For each
* case, the field for the opposite case contains no meaningful data
* and using them in any way is likely to cause an error.
*/
elt_t ty;
/**
* If the type of the element is <B>SEXP_VALUE</B> and the aty field
* is not <B>SEXP_BINARY</B>, this field will contain the actual data
* represented by this element.
*/
char *val;
/**
* Number of bytes allocated for val.
*/
size_t val_allocated;
/**
* Number of bytes used in val (<= val_allocated).
*/
size_t val_used;
/**
* If the type of the element is <B>SEXP_LIST</B>, this field will contain
* a pointer to the head element of the list.
*/
struct elt *list;
/**
* The <I>next</I> field is a pointer to the next element in the current
* expression. If this element is the last element in the s-expression,
* this field will be <I>NULL</I>.
*/
struct elt *next;
/**
* For elements that represent <I>values</I>, this field will specify the
* specific type of value that it represents. This can be used by
* functions to determine how this value should be printed (ie: how it
* should be quoted) or interpreted (ie: interpreting s-expressions that
* are prefixed with a tick-mark.). This value also indicates whether or
* not the programmer should look in the val field or bindata field for
* the atom data.
*/
atom_t aty;
/**
* For elements that represent <i>binary</I> blobs, this field will
* point to a memory location where the data resides. The length
* of this memory blob is the next field. char* implies byte sized
* elements. This is only used in INLINE_BINARY parser mode.
* <B>IMPORTANT NOTE</B>: The data in this field is freed on a
* destroy_sexp() call, so users should copy it to memory they allocate
* if they wish it to persist after the sexp_t has been freed.
*/
char *bindata;
/**
* The length of the data pointed at by bindata in bytes.
*/
size_t binlength;
} sexp_t;
/**
* parser mode flag used by continuation to toggle special parser
* behaviour.
*/
typedef enum {
/**
* normal (LISP-style) s-expression parser behaviour.
*/
PARSER_NORMAL,
/**
* treat atoms beginning with \#b\# as inlined binary data. everything
* else is treated the same as in PARSER_NORMAL mode.
*/
PARSER_INLINE_BINARY,
/**
* if the event_handlers field in the continuation contains a non-null
* value, the handlers specified in the parser_event_handlers_t struct
* will be called as appropriate, but the parser will not allocate a
* structure composed of sexp_t structs. Note that if the event_handlers
* is set to null and this mode is selected, the user would be better off
* not calling anything in the first place, as they are telling the parser
* to walk the string, but do nothing productive in the process.
*/
PARSER_EVENTS_ONLY
} parsermode_t;
/**
* Some users would prefer to, instead of parsing a full string and walking
* a potentially huge sexp_t structure, use an XML SAX-style parser where
* events are triggered as certain parts of the s-expression are encountered.
* This structure contains a set of function pointers that are called by
* the parser as it hits expression start and end, and completes reading
* atoms and binary data. NOTE: The parser_event_handler struct that is
* a field in the continuation data structure is NOT freed by
* destroy_continuation since structs for callbacks are ALWAYS malloc'd
* by the user, not the library.
*/
typedef struct parser_event_handlers {
/**
* The start_sexpr function pointer is called when an open parenthesis
* is encountered starting an expression.
*/
void (* start_sexpr)();
/**
* The end_sexpr function pointer is called when an close parenthesis
* is encountered ending an expression.
*/
void (* end_sexpr)();
/**
* The characters function pointer is called when an atom is completely
* parsed. The function must take three arguments: a pointer to the
* atom data, the number of elements the atom contains, and the
* specific type of atom that the data represents.
*/
void (* characters)(const char *data, size_t len, atom_t aty);
/**
* The binary function pointer is called when the parser is functioning
* in INLINE_BINARY mode and binary data is encountered. The function
* must take two arguments: a pointer to the beginning of the binary data
* and the number of bytes of data present.
*/
void (* binary)(const char *data, size_t len);
} parser_event_handlers_t;
/**
* A continuation is used by the parser to save and restore state between
* invocations to support partial parsing of strings. For example, if we
* pass the string "(foo bar)(goo car)" to the parser, we want to be able
* to retrieve each s-expression one at a time - it would be difficult to
* return all s-expressions at once without knowing how many there are in
* advance (this would require more memory management than we want...).
* So, by using a continuation-based parser, we can call it with this string
* and have it return a continuation when it has parsed the first
* s-expression. Once we have processed the s-expression (accessible
* through the <i>last_sexpr</i> field of the continuation), we can call
* the parser again with the same string and continuation, and it will be
* able to pick up where it left off.<P>
*
* We use continuations instead of a state-ful parser to allow multiple
* concurrent strings to be parsed by simply maintaining a set of
* continuations. Manipulating continuations by hand is required if the
* continuation-based parser is called directly. This is <b>not
* recommended</b> unless you are willing to deal with potential errors and
* are willing to learn exactly how the continuation relates to the
* internals of the parser. A simpler approach is to use either the
* <i>parse_sexp</i> function that simply returns an s-expression without
* exposing the continuations, or the <i>iparse_sexp</i> function that
* allows iteratively popping one s-expression at a time from a string
* containing one or more s-expressions. Refer to the documentation for
* each parsing function for further details on behavior and usage.
*/
typedef struct pcont {
/**
* The parser stack used for iterative parsing.
*/
faststack_t *stack;
/**
* The last full s-expression encountered by the parser. If this is
* NULL, the parser has not encountered a full s-expression and more
* data is required for the current s-expression being parsed. If this
* is non-NULL, then the parser has encountered one s-expression and may
* be partially through parsing the next s-expression.
*/
sexp_t *last_sexp;
/**
* Pointer to a temporary buffer used to store atom values during parsing.
*/
char *val;
/**
* Current number of bytes allocated for val.
*/
size_t val_allocated;
/**
* Current number of used bytes in val.
*/
size_t val_used;
/**
* Pointer to the character following the last character in the current
* atom value being parsed.
*/
char *vcur;
/**
* Pointer to the last character to examine in the string being parsed.
* When the parser is called with the continuation, this is the first
* character that will be processed. If this is NULL, the parser will
* start parsing at the beginning of the string passed into the parser.
*/
char *lastPos;
/**
* This is a pointer to the beginning of the current string being
* processed. lastPos is a pointer to some value inside the string
* that this points to.
*/
char *sbuffer;
/**
* This is the depth of parenthesis (the number of left parens encountered)
* that the parser is currently working with.
*/
unsigned int depth;
/**
* This is the depth of parenthesis encountered after a single quote (tick)
* if the character immediately following the tick was a left paren.
*/
unsigned int qdepth;
/**
* This is the state ID of the current state of the parser in the
* DFA representing the parser. The current parser is a DFA based parser
* to simplify restoring the proper state from a continuation.
*/
unsigned int state;
/**
* This is a flag indicating whether the next character to be processed
* should be assumed to have been prefaced with a '\' character to escape
* it.
*/
unsigned int esc;
/**
* Flag whether or not we are processing an atom that was preceded by
* a single quote.
*/
unsigned int squoted;
/**
* Error code. Used to indicate that the continuation being returned does
* not represent a successful parsing and thus the contents aren't of much
* value.
*/
sexp_errcode_t error;
/**
* Mode. The parsers' specialized behaviours can be activated by
* tweaking the mode setting. There are currently two available:
* normal and inline_binary. Inline_binary treats atoms that start
* with \#b\# specially, assuming that they have the structure:
*
* \#b\#s\#data
*
* Where s is a positive (greater than 0) integer representing the length
* of the data, and data is s bytes of binary data following the \#
* sign. After the s bytes, it is assumed normal s-expression data
* continues.
*/
parsermode_t mode;
/* -----------------------------------------------------------------
* These fields below are related to dealing with INLINE_BINARY mode
* ----------------------------------------------------------------- */
/**
* Length to expect of the current binary data being read in.
* this also corresponds to the size of the memory allocated for
* reading this binary data into.
*/
size_t binexpected;
/**
* Number of bytes of the binary blob that have already been read in.
*/
size_t binread;
/**
* Pointer to the memory containing the binary data being read in.
*/
char *bindata;
/**
* Pointer to a structure holding handlers for sexpr events. NULL for
* normal parser operation. This field is NOT freed by
* destroy_continuation and must be free'd by the user. This is because
* these are malloc'd outside the library ALWAYS, so they are the user's
* responsibility.
*/
parser_event_handlers_t *event_handlers;
} pcont_t;
/**
* \ingroup IO
* This structure is a wrapper around a standard I/O file descriptor and
* the parsing infrastructure (continuation and a buffer) required to
* parse off of it. This is used so that routines can hide the loops and
* details required to accumulate up data read off of the file descriptor
* and parse expressions individually out of it.
*/
typedef struct sexp_iowrap {
/**
* Continuation used to parse off of the file descriptor.
*/
pcont_t *cc;
/**
* The file descriptor. Currently CANNOT be a socket since implementation
* uses read(), not recv().
*/
int fd;
/**
* Buffer to read data into before parsing.
*/
char buf[BUFSIZ];
/**
* Byte count for last read. If it is -1, there was an error. Otherwise,
* it will be a value from 0 to BUFSIZ.
*/
size_t cnt;
} sexp_iowrap_t;
/*========*/
/* GLOBAL */
/*========*/
/**
* Global value indicating the most recent error condition encountered.
* This value can be reset to SEXP_ERR_OK by calling sexp_errno_reset().
*/
extern sexp_errcode_t sexp_errno;
/*===========*/
/* FUNCTIONS */
/*===========*/
/* this is for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/**
* \ingroup parser
* Set the parameters on atom value buffer allocation and growth sizes.
* This is an important point for performance tuning, as many factors in
* the expected expression structure must be taken into account such as:
*
* - Average size of atom values
* - Variance in sizes of atom values
* - Amount of memory that is tolerably ''wasted'' (allocated but not
* used)
*
* The \a ss parameter specifies the initial size of all atom buffers.
* Ideally, this should be sufficiently large to capture MOST atom values,
* or at least close enough such that one growth is required. The
* \a gs parameter specifies the number of bytes to increase the buffer size
* by when space is exhausted. A safe choice for parameter sizes would
* be on the order of the average size for \a ss, and one standard
* deviation for \a gs. This ensures that 50% of all expressions are
* guaranteed to fit in the initial buffer, and roughly 80-90% will fit in
* one growth. If memory is not an issue, choosing ss to be the mean plus
* one standard deviation will capture 80-90% of expressions in the initial
* buffer, and a gs of one standard deviation will capture nearly all
* expressions.
*
* Note: These parameters can be tuned at runtime as needs change, and they
* will be applied to all expressions and expression elements parsed after
* they are modified. They will not be applied retroactively to expressions
* that have already been parsed.
*/
sexp_errcode_t set_parser_buffer_params(size_t ss, size_t gs);
/**
* return an allocated sexp_t. This structure may be an already allocated
* one from the stack or a new one if none are available. Use this instead
* of manually mallocing if you want to avoid excessive mallocs. <I>Note:
* Mallocing your own expressions is fine - you can even use
* sexp_t_deallocate to deallocate them and put them in the pool.</I>
* Also, if the stack has not been initialized yet, this does so.
*/
sexp_t *sexp_t_allocate(void);
/**
* given a malloc'd sexp_t element, put it back into the already-allocated
* element stack. This method will allocate a stack if one has not been
* allocated already.
*/
void sexp_t_deallocate(sexp_t *s);
/**
* In the event that someone wants us to release ALL of the memory used
* between calls by the library, they can free it. If you don't call
* this, the caches will be persistent for the lifetime of the library
* user. Note that in the event of an error condition resulting in
* sexp_errno being set, the user might consider calling this to clean up
* any memory that may be lingering around that should be cleaned up.
*/
void sexp_cleanup(void);
/**
* print a sexp_t struct as a string in the LISP style. If the buffer
* is large enough and the conversion is successful, the return value
* represents the length of the string contained in the buffer. If the
* buffer was too small, or some other error occurred, the return
* value is -1 and the contents of the buffer should not be assumed to
* contain any useful information. When the return value is -1, the
* caller should check the contents of sexp_errno for details on what
* error may have occurred.
*/
int print_sexp(char *loc, size_t size, const sexp_t *e);
/**
* print a sexp_t structure to a buffer, growing it as necessary instead
* of relying on fixed size buffers like print_sexp. Important argument
* to tune for performance reasons is <tt>ss</tt> - the
* buffer start size. The growsize used by the CSTRING routines also should
* be considered for tuning via the sgrowsize() function. This routine no
* longer requires the user to specify the growsize, and uses the current
* setting without changing it.
*/
int print_sexp_cstr(CSTRING **s, const sexp_t *e, size_t ss);
/**
* Allocate a new sexp_t element representing a list.
*/
sexp_t *new_sexp_list(sexp_t *l);
/**
* Allocate a new sexp_t element representing a raw binary atom.
* This element will contain a pointer to the raw binary data
* provided, as well as the binary data length. The character
* atom fields will be NULL and the corresponding val
* length and allocation size will be set to zero since this
* element is carrying a binary pointer only.
*/
sexp_t *new_sexp_binary_atom(char *data, size_t binlength);
/**
* Allocate a new sexp_t element representing a value. The user must
* specify the precise type of the atom. This used to default to
* SEXP_BASIC, but this can lead to errors if the user did not expect this
* assumption. By explicitly passing in the atom type, the caller should
* ensure that the data in the buffer is valid given the requested atom
* type. For performance reasons, such checks are left to the caller if
* they are desired, and not performed in the library if they are not
* wanted.
*/
sexp_t *new_sexp_atom(const char *buf, size_t bs, atom_t aty);
/**
* create an initial continuation for parsing the given string
*/
pcont_t *init_continuation(char *str);
/**
* destroy a continuation. This involves cleaning up what it contains,
* and cleaning up the continuation itself.
*/
void destroy_continuation (pcont_t * pc);
/**
* \ingroup IO
* create an IO wrapper structure around a file descriptor. A NULL return
* value indicates some problem occurred allocating the wrapper, so the
* user should check the value of sexp_errno for further information.
*/
sexp_iowrap_t *init_iowrap(int fd);
/**
* \ingroup IO
* destroy an IO wrapper structure. The file descriptor wrapped in the
* wrapper will <B>not</B> be closed, so the caller is responsible
* for manually calling close on the file descriptor.
*/
void destroy_iowrap(sexp_iowrap_t *iow);
/**
* \ingroup IO
* given and IO wrapper handle, read one s-expression
* off of it. this expression may be contained in a continuation,
* so there is no guarantee that under the covers an IO read
* actually is occurring. A return value of NULL can either indicate
* a parser error or no more data on the input IO handle. In the
* event that NULL is returned, the user should check to see if
* sexp_errno contains SEXP_ERR_IO_EMPTY (no more data) or a more
* problematic error.
*/
sexp_t *read_one_sexp(sexp_iowrap_t *iow);
/**
* \ingroup parser
* wrapper around parser for compatibility.
*/
sexp_t *parse_sexp(char *s, size_t len);
/**
* \ingroup parser
* wrapper around parser for friendlier continuation use
* pre-condition : continuation (cc) is NON-NULL!
*/
sexp_t *iparse_sexp(char *s, size_t len, pcont_t *cc);
/**
* \ingroup parser
* given a LISP style s-expression string, parse it into a set of
* connected sexp_t structures.
*/
pcont_t *cparse_sexp(char *s, size_t len, pcont_t *pc);
/**
* given a sexp_t structure, free the memory it uses (and recursively free
* the memory used by all sexp_t structures that it references). Note
* that this will call the deallocation routine for sexp_t elements.
* This means that memory isn't freed, but stored away in a cache of
* pre-allocated elements. This is an optimization to speed up the
* parser to eliminate wasteful free and re-malloc calls. Note: If using
* inlined binary mode, this will free the data pointed to by the bindata
* field. So, if you care about the data after the lifetime of the
* s-expression, make sure to make a copy before cleaning up the sexpr.
*/
void destroy_sexp(sexp_t *s);
/**
* reset the value of sexp_errno to SEXP_ERR_OK.
*/
void reset_sexp_errno();
/**
* print the contents of the parser continuation stack to a buffer.
* this is useful if an expression is partially parsed and the caller
* realizes that something is wrong with it. with this routine,
* the caller can reconstruct the expression parsed so far and use it
* for error reporting. this works with fixed size buffers allocated
* by the caller. there is not a CSTRING-based version currently.
*/
void print_pcont(pcont_t * pc, char * buf, size_t buflen);
/* this is for C++ users */
#ifdef __cplusplus
}
#endif
#include "sexp_ops.h"
#endif /* __SEXP_H__ */