forked from rc0/mairix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mairix.h
415 lines (359 loc) · 12.7 KB
/
mairix.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
/*
mairix - message index builder and finder for maildir folders.
**********************************************************************
* Copyright (C) Richard P. Curnow 2002,2003,2004,2005,2006
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
**********************************************************************
*/
#ifndef MAIRIX_H
#define MAIRIX_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "memmac.h"
enum message_type {/*{{{*/
MTY_DEAD, /* msg no longer exists, i.e. don't report in searches,
prune it on a '-p' run. */
MTY_FILE, /* msg <-> file in 1-1 correspondence e.g. maildir, MH */
MTY_MBOX, /* multiple msgs per file : MBOX format file */
MTY_IMAP /* Message on IMAP server; syntax: uidvalidity:uid:folder */
};
/*}}}*/
enum folder_type {/*{{{*/
FT_MAILDIR,
FT_MH,
FT_MBOX,
FT_RAW,
FT_EXCERPT,
FT_IMAP
};
/*}}}*/
struct msgpath {/*{{{*/
enum message_type type; /* selector for union 'src' */
union {
struct {
char *path;
size_t size; /* size of the message in bytes */
time_t mtime; /* mtime of message file on disc */
} mpf; /* message per file */
struct {
int file_index; /* index into table of mbox files */
int msg_index; /* index of message within the file */
} mbox; /* for messages in mbox format folders */
} src;
/* Now fields that are common to both types of message. */
time_t date; /* representation of Date: header in message */
int tid; /* thread-id */
/* Track the folder type this came from, so we know the difference
between MH and Maildir, both of which have type MTY_FILE. */
enum folder_type source_ft;
/* Message flags. */
unsigned int seen:1;
unsigned int replied:1;
unsigned int flagged:1;
/* + other stuff eventually */
};
/*}}}*/
struct msgpath_array {/*{{{*/
struct msgpath *paths;
int n;
int max;
};
/*}}}*/
struct matches {/*{{{*/
unsigned char *msginfo;
int n; /* bytes in use */
int max; /* bytes allocated */
unsigned long highest;
};
/*}}}*/
struct token {/*{{{*/
char *text;
unsigned long hashval;
/* to store delta-compressed info of which msgpaths match the token */
struct matches match0;
};
/*}}}*/
struct token2 {/*{{{*/
char *text;
unsigned long hashval;
/* to store delta-compressed info of which msgpaths match the token */
struct matches match0;
struct matches match1;
};
/*}}}*/
struct toktable {/*{{{*/
struct token **tokens;
int n; /* # in use */
int size; /* # allocated */
unsigned int mask; /* for masking down hash values */
int hwm; /* number to have before expanding */
};
/*}}}*/
struct toktable2 {/*{{{*/
struct token2 **tokens;
int n; /* # in use */
int size; /* # allocated */
unsigned int mask; /* for masking down hash values */
int hwm; /* number to have before expanding */
};
/*}}}*/
enum content_type {/*{{{*/
CT_TEXT_PLAIN,
CT_TEXT_HTML,
CT_TEXT_OTHER,
CT_MESSAGE_RFC822,
CT_OTHER
};
/*}}}*/
struct rfc822;
struct attachment {/*{{{*/
struct attachment *next;
struct attachment *prev;
enum content_type ct;
char *filename;
union attachment_body {
struct normal_attachment_body {
int len;
char *bytes;
} normal;
struct rfc822 *rfc822;
} data;
};
/*}}}*/
struct headers {/*{{{*/
char *to;
char *cc;
char *from;
char *subject;
/* The following are needed to support threading */
char *message_id;
char *in_reply_to;
char *references;
struct {
unsigned int seen:1;
unsigned int replied:1;
unsigned int flagged:1;
} flags;
time_t date;
};
/*}}}*/
struct rfc822 {/*{{{*/
struct headers hdrs;
struct attachment atts;
};
/*}}}*/
typedef char checksum_t[16];
struct message_list {/*{{{*/
struct message_list *next;
off_t start;
size_t len;
};
/*}}}*/
struct mbox {/*{{{*/
/* If path==NULL, this indicates that the mbox is dead, i.e. no longer
* exists. */
char *path;
/* As read in from database (i.e. current last time mairix scan was run.) */
time_t file_mtime;
size_t file_size;
/* As found in the filesystem now. */
time_t current_mtime;
size_t current_size;
/* After reconciling a loaded database with what's on the disc, this entry
stores how many of the msgs that used to be there last time are still
present at the head of the file. Thus, all messages beyond that are
treated as dead, and scanning starts at that point to find 'new' messages
(whch may actually be old ones that have moved, but they're treated as
new.) */
int n_old_msgs_valid;
/* Hold list of new messages and their number. Number is temporary -
* eventually just list walking in case >=2 have to be reattached. */
struct message_list *new_msgs;
int n_new_msgs;
int n_so_far; /* Used during database load. */
int n_msgs; /* Number of entries in 'start' and 'len' */
int max_msgs; /* Allocated size of 'start' and 'len' */
/* File offset to the start of each message (first line of real header, not to mbox 'From ' line) */
off_t *start;
/* Length of each message */
size_t *len;
/* Checksums on whole messages. */
checksum_t *check_all;
};
/*}}}*/
struct database {/*{{{*/
/* Used to hold an entire mapping between an array of filenames, each
containing a single message, and the sets of tokens that occur in various
parts of those messages */
enum message_type *type;
struct msgpath *msgs; /* Paths to messages */
int n_msgs; /* Number in use */
int max_msgs; /* Space allocated */
struct mbox *mboxen;
int n_mboxen; /* number in use. */
int max_mboxen; /* space allocated */
/* Seed for hashing in the token tables. Randomly created for
* each new database - avoid DoS attacks through carefully
* crafted messages. */
unsigned int hash_key;
/* Token tables */
struct toktable *to;
struct toktable *cc;
struct toktable *from;
struct toktable *subject;
struct toktable *body;
struct toktable *attachment_name;
/* Encoding chain 0 stores all msgids appearing in the following message headers:
* Message-Id, In-Reply-To, References. Used for thread reconciliation.
* Encoding chain 1 stores just the Message-Id. Used for search by message ID.
*/
struct toktable2 *msg_ids;
};
/*}}}*/
struct string_list {/*{{{*/
struct string_list *next;
struct string_list *prev;
char *data;
};
/*}}}*/
struct msg_src {
enum {MS_FILE, MS_MBOX} type;
char *filename;
off_t start;
size_t len;
};
/* Outcomes of checking a filename/dirname to see whether to keep on looking
* at filenames within this dir. */
enum traverse_check {
TRAV_PROCESS, /* Continue looking at this entry */
TRAV_IGNORE, /* Ignore just this dir entry */
TRAV_FINISH /* Ignore this dir entry and don't bother looking at the rest of the directory */
};
struct traverse_methods {
int (*filter)(const char *, const struct stat *);
enum traverse_check (*scrutinize)(int, const char *);
};
extern struct traverse_methods maildir_traverse_methods;
extern struct traverse_methods mh_traverse_methods;
extern struct traverse_methods mbox_traverse_methods;
extern int verbose; /* cmd line -v switch */
extern int do_hardlinks; /* cmd line -H switch */
/* Lame fix for systems where NAME_MAX isn't defined after including the above
* set of .h files (Solaris, FreeBSD so far). Probably grossly oversized but
* it'll do. */
#if !defined(NAME_MAX)
#define NAME_MAX 4096
#endif
/* In glob.c */
struct globber;
struct globber_array;
struct globber *make_globber(const char *wildstring);
void free_globber(struct globber *old);
int is_glob_match(struct globber *g, const char *s);
struct globber_array *colon_sep_string_to_globber_array(const char *in);
int is_globber_array_match(struct globber_array *ga, const char *s);
void free_globber_array(struct globber_array *in);
void free_string_array(int n, char ***arr);
/* In hash.c */
unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval);
/* In dirscan.c */
struct msgpath_array *new_msgpath_array(void);
int valid_mh_filename_p(const char *x);
void free_msgpath_array(struct msgpath_array *x);
void string_list_to_array(struct string_list *list, int *n, char ***arr);
void split_on_colons(const char *str, int *n, char ***arr);
void build_message_list(char *folder_base, char *folders, enum folder_type ft,
struct msgpath_array *msgs, struct globber_array *omit_globs);
/* In rfc822.c */
struct rfc822 *make_rfc822(char *filename);
void free_rfc822(struct rfc822 *msg);
enum data_to_rfc822_error {
DTR8_OK,
DTR8_MISSING_END, /* missing endpoint marker. */
DTR8_MULTIPART_SANS_BOUNDARY, /* multipart with no boundary string defined */
DTR8_BAD_HEADERS, /* corrupt headers */
DTR8_BAD_ATTACHMENT /* corrupt attachment (e.g. no body part) */
};
struct rfc822 *data_to_rfc822(struct msg_src *src, char *data, int length, enum data_to_rfc822_error *error);
enum ro_map_compressed_behaviour {
MAP_DECOMPRESS_IF_APPLICABLE,
MAP_NO_DECOMPRESSION
};
void create_ro_mapping(const char *filename, unsigned char **data, int *len, enum ro_map_compressed_behaviour cb);
void free_ro_mapping(unsigned char *data, int len);
char *format_msg_src(struct msg_src *src);
/* In tok.c */
struct toktable *new_toktable(void);
struct toktable2 *new_toktable2(void);
void free_token(struct token *x);
void free_token2(struct token2 *x);
void free_toktable(struct toktable *x);
void free_toktable2(struct toktable2 *x);
void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table);
void check_and_enlarge_encoding(struct matches *m);
void insert_index_on_encoding(struct matches *m, int idx);
void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1);
/* In db.c */
#define CREATE_RANDOM_DATABASE_HASH 0
struct database *new_database(unsigned int hash_key);
struct database *new_database_from_file(char *db_filename, int do_integrity_checks);
void free_database(struct database *db);
void maybe_grow_message_arrays(struct database *db);
void tokenise_message(int file_index, struct database *db, struct rfc822 *msg);
struct imap_ll;
int update_database(struct database *db, struct msgpath *sorted_paths, int n_paths, int do_fast_index, struct imap_ll *);
void check_database_integrity(struct database *db);
int cull_dead_messages(struct database *db, int do_integrity_checks);
/* In mbox.c */
void build_mbox_lists(struct database *db, const char *folder_base,
const char *mboxen_paths, struct globber_array *omit_globs,
int do_mbox_symlinks);
int add_mbox_messages(struct database *db);
void compute_checksum(const char *data, size_t len, checksum_t *csum);
void cull_dead_mboxen(struct database *db);
unsigned int encode_mbox_indices(unsigned int mb, unsigned int msg);
void decode_mbox_indices(unsigned int index, unsigned int *mb, unsigned int *msg);
int verify_mbox_size_constraints(struct database *db);
void glob_and_expand_paths(const char *folder_base, char **paths_in, int n_in, char ***paths_out, int *n_out, const struct traverse_methods *methods, struct globber_array *omit_globs);
void free_mboxen(struct database *db);
/* In glob.c */
struct globber;
struct globber *make_globber(const char *wildstring);
void free_globber(struct globber *old);
int is_glob_match(struct globber *g, const char *s);
/* In writer.c */
void write_database(struct database *db, char *filename, int do_integrity_checks);
/* In search.c */
int search_top(int do_threads, int do_augment, char *database_path, char *complete_mfolder, char **argv, enum folder_type ft, int verbose, const char *imap_pipe, const char *imap_server, const char *imap_username, const char *imap_password, int sort_by_date);
/* In stats.c */
void get_db_stats(struct database *db);
/* In dates.c */
int scan_date_string(char *in, time_t *start, int *has_start, time_t *end, int *has_end);
/* In dumper.c */
void dump_database(char *filename);
/* In strexpand.c */
char *expand_string(const char *p);
/* In dotlock.c */
void lock_database(char *path, int forced_unlock);
void unlock_database(void);
void unlock_and_exit(int code);
/* In mairix.c */
void report_error(const char *str, const char *filename);
#endif /* MAIRIX_H */