-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathshoco.c
223 lines (192 loc) · 6.2 KB
/
shoco.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#include <stdint.h>
#if (defined (__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || __BIG_ENDIAN__)
#define swap(x) (x)
#else
#if defined(_MSC_VER)
#include <stdlib.h>
#define swap(x) _byteswap_ulong(x)
#elif defined (__GNUC__)
#if defined(__builtin_bswap32)
#define swap(x) __builtin_bswap32(x)
#else
#define swap(x) ((x<<24) + ((x&0x0000FF00)<<8) + ((x&0x00FF0000)>>8) + (x>>24))
#endif
#else
#include <byteswap.h>
#define swap(x) bswap_32(x)
#endif
#endif
#if defined(_MSC_VER)
#define _ALIGNED __declspec(align(16))
#define inline __inline
#elif defined(__GNUC__)
#define _ALIGNED __attribute__ ((aligned(16)))
#else
#define _ALIGNED
#endif
#if defined(_M_X64) || defined (_M_AMD64) || defined (__x86_64__)
#include "emmintrin.h"
#define HAVE_SSE2
#endif
#include "shoco.h"
#define _SHOCO_INTERNAL
#include "shoco_model.h"
static inline int decode_header(unsigned char val) {
int i = -1;
while ((signed char)val < 0) {
val <<= 1;
++i;
}
return i;
}
union Code {
uint32_t word;
char bytes[4];
};
#ifdef HAVE_SSE2
static inline int check_indices(const int16_t * shoco_restrict indices, int pack_n) {
__m128i zero = _mm_setzero_si128();
__m128i indis = _mm_load_si128 ((__m128i *)indices);
__m128i masks = _mm_load_si128 ((__m128i *)packs[pack_n].masks);
__m128i cmp = _mm_cmpgt_epi16 (indis, masks);
__m128i mmask = _mm_cmpgt_epi16 (masks, zero);
cmp = _mm_and_si128 (cmp, mmask);
int result = _mm_movemask_epi8 (cmp);
return (result == 0);
}
#else
static inline int check_indices(const int16_t * shoco_restrict indices, int pack_n) {
for (unsigned int i = 0; i < packs[pack_n].bytes_unpacked; ++i)
if (indices[i] > packs[pack_n].masks[i])
return 0;
return 1;
}
#endif
static inline int find_best_encoding(const int16_t * shoco_restrict indices, unsigned int n_consecutive) {
for (int p = PACK_COUNT - 1; p >= 0; --p)
if ((n_consecutive >= packs[p].bytes_unpacked) && (check_indices(indices, p)))
return p;
return -1;
}
size_t shoco_compress(const char * const shoco_restrict original, size_t strlen, char * const shoco_restrict out, size_t bufsize) {
char *o = out;
char * const out_end = out + bufsize;
const char *in = original;
int16_t _ALIGNED indices[MAX_SUCCESSOR_N + 1] = { 0 };
int last_chr_index;
int current_index;
int successor_index;
unsigned int n_consecutive;
union Code code;
int pack_n;
unsigned int rest;
const char * const in_end = original + strlen;
while ((*in != '\0')) {
if (strlen && (in == in_end))
break;
// find the longest string of known successors
indices[0] = chr_ids_by_chr[(unsigned char)in[0]];
last_chr_index = indices[0];
if (last_chr_index < 0)
goto last_resort;
rest = in_end - in;
for (n_consecutive = 1; n_consecutive <= MAX_SUCCESSOR_N; ++n_consecutive) {
if (strlen && (n_consecutive == rest))
break;
current_index = chr_ids_by_chr[(unsigned char)in[n_consecutive]];
if (current_index < 0) // '\0' is always -1
break;
successor_index = successor_ids_by_chr_id_and_chr_id[last_chr_index][current_index];
if (successor_index < 0)
break;
indices[n_consecutive] = (int16_t)successor_index;
last_chr_index = current_index;
}
if (n_consecutive < 2)
goto last_resort;
pack_n = find_best_encoding(indices, n_consecutive);
if (pack_n >= 0) {
if (o + packs[pack_n].bytes_packed > out_end)
return bufsize + 1;
code.word = packs[pack_n].word;
for (unsigned int i = 0; i < packs[pack_n].bytes_unpacked; ++i)
code.word |= indices[i] << packs[pack_n].offsets[i];
// In the little-endian world, we need to swap what's
// in the register to match the memory representation.
// On big-endian systems, this is a dummy.
code.word = swap(code.word);
// if we'd just copy the word, we might write over the end
// of the output string
for (unsigned int i = 0; i < packs[pack_n].bytes_packed; ++i)
o[i] = code.bytes[i];
o += packs[pack_n].bytes_packed;
in += packs[pack_n].bytes_unpacked;
} else {
last_resort:
if (*in & 0x80) {
// non-ascii case
if (o + 2 > out_end)
return bufsize + 1;
// put in a sentinel byte
*o++ = 0x00;
} else {
// an ascii byte
if (o + 1 > out_end)
return bufsize + 1;
}
*o++ = *in++;
}
}
return o - out;
}
size_t shoco_decompress(const char * const shoco_restrict original, size_t complen, char * const shoco_restrict out, size_t bufsize) {
char *o = out;
char * const out_end = out + bufsize;
const char *in = original;
char last_chr;
union Code code = { 0 };
int offset;
int mask;
int mark;
const char * const in_end = original + complen;
while (in < in_end) {
mark = decode_header(*in);
if (mark < 0) {
if (o >= out_end)
return bufsize + 1;
// ignore the sentinel value for non-ascii chars
if (*in == 0x00) {
if (++in >= in_end)
return SIZE_MAX;
}
*o++ = *in++;
} else {
if (o + packs[mark].bytes_unpacked > out_end)
return bufsize + 1;
else if (in + packs[mark].bytes_packed > in_end)
return SIZE_MAX;
// This should be OK as well, but it fails with emscripten.
// Test this with new versions of emcc.
//code.word = swap(*(uint32_t *)in);
for (unsigned int i = 0; i < packs[mark].bytes_packed; ++i)
code.bytes[i] = in[i];
code.word = swap(code.word);
// unpack the leading char
offset = packs[mark].offsets[0];
mask = packs[mark].masks[0];
last_chr = o[0] = chrs_by_chr_id[(code.word >> offset) & mask];
// unpack the successor chars
for (unsigned int i = 1; i < packs[mark].bytes_unpacked; ++i) {
offset = packs[mark].offsets[i];
mask = packs[mark].masks[i];
last_chr = o[i] = chrs_by_chr_and_successor_id[(unsigned char)last_chr - MIN_CHR][(code.word >> offset) & mask];
}
o += packs[mark].bytes_unpacked;
in += packs[mark].bytes_packed;
}
}
// append a 0-terminator if it fits
if (o < out_end)
*o = '\0';
return o - out;
}