926 lines
27 KiB
C
926 lines
27 KiB
C
/*
|
|
* Encoding and decoding basic formats: hex, base64.
|
|
*
|
|
* These are in-place operations which may allow an optimized implementation.
|
|
*
|
|
* Base-64: https://tools.ietf.org/html/rfc4648#section-4
|
|
*/
|
|
|
|
#include "duk_internal.h"
|
|
|
|
/*
|
|
* Misc helpers
|
|
*/
|
|
|
|
/* Shared handling for encode/decode argument. Fast path handling for
|
|
* buffer and string values because they're the most common. In particular,
|
|
* avoid creating a temporary string or buffer when possible. Return value
|
|
* is guaranteed to be non-NULL, even for zero length input.
|
|
*/
|
|
DUK_LOCAL const duk_uint8_t *duk__prep_codec_arg(duk_hthread *thr, duk_idx_t idx, duk_size_t *out_len) {
|
|
const void *def_ptr = (const void *) out_len; /* Any non-NULL pointer will do. */
|
|
const void *ptr;
|
|
duk_bool_t isbuffer;
|
|
|
|
DUK_ASSERT(out_len != NULL);
|
|
DUK_ASSERT(def_ptr != NULL);
|
|
DUK_ASSERT(duk_is_valid_index(thr, idx)); /* checked by caller */
|
|
|
|
ptr = (const void *) duk_get_buffer_data_raw(thr, idx, out_len, NULL /*def_ptr*/, 0 /*def_size*/, 0 /*throw_flag*/, &isbuffer);
|
|
if (isbuffer) {
|
|
DUK_ASSERT(ptr != NULL || *out_len == 0U);
|
|
if (DUK_UNLIKELY(ptr == NULL)) {
|
|
ptr = def_ptr;
|
|
}
|
|
DUK_ASSERT(ptr != NULL);
|
|
} else {
|
|
/* For strings a non-NULL pointer is always guaranteed because
|
|
* at least a NUL will be present.
|
|
*/
|
|
ptr = (const void *) duk_to_lstring(thr, idx, out_len);
|
|
DUK_ASSERT(ptr != NULL);
|
|
}
|
|
DUK_ASSERT(ptr != NULL);
|
|
return (const duk_uint8_t *) ptr;
|
|
}
|
|
|
|
/*
|
|
* Base64
|
|
*/
|
|
|
|
#if defined(DUK_USE_BASE64_SUPPORT)
|
|
/* Bytes emitted for number of padding characters in range [0,4]. */
|
|
DUK_LOCAL const duk_int8_t duk__base64_decode_nequal_step[5] = {
|
|
3, /* #### -> 24 bits, emit 3 bytes */
|
|
2, /* ###= -> 18 bits, emit 2 bytes */
|
|
1, /* ##== -> 12 bits, emit 1 byte */
|
|
-1, /* #=== -> 6 bits, error */
|
|
0, /* ==== -> 0 bits, emit 0 bytes */
|
|
};
|
|
|
|
#if defined(DUK_USE_BASE64_FASTPATH)
|
|
DUK_LOCAL const duk_uint8_t duk__base64_enctab_fast[64] = {
|
|
0x41U, 0x42U, 0x43U, 0x44U, 0x45U, 0x46U, 0x47U, 0x48U, 0x49U, 0x4aU, 0x4bU, 0x4cU, 0x4dU, 0x4eU, 0x4fU, 0x50U, /* A...P */
|
|
0x51U, 0x52U, 0x53U, 0x54U, 0x55U, 0x56U, 0x57U, 0x58U, 0x59U, 0x5aU, 0x61U, 0x62U, 0x63U, 0x64U, 0x65U, 0x66U, /* Q...f */
|
|
0x67U, 0x68U, 0x69U, 0x6aU, 0x6bU, 0x6cU, 0x6dU, 0x6eU, 0x6fU, 0x70U, 0x71U, 0x72U, 0x73U, 0x74U, 0x75U, 0x76U, /* g...v */
|
|
0x77U, 0x78U, 0x79U, 0x7aU, 0x30U, 0x31U, 0x32U, 0x33U, 0x34U, 0x35U, 0x36U, 0x37U, 0x38U, 0x39U, 0x2bU, 0x2fU /* w.../ */
|
|
};
|
|
#endif /* DUK_USE_BASE64_FASTPATH */
|
|
|
|
#if defined(DUK_USE_BASE64_FASTPATH)
|
|
/* Decode table for one byte of input:
|
|
* -1 = allowed whitespace
|
|
* -2 = padding
|
|
* -3 = error
|
|
* 0...63 decoded bytes
|
|
*/
|
|
DUK_LOCAL const duk_int8_t duk__base64_dectab_fast[256] = {
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, /* 0x00...0x0f */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0x10...0x1f */
|
|
-1, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, 62, -3, -3, -3, 63, /* 0x20...0x2f */
|
|
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -3, -3, -3, -2, -3, -3, /* 0x30...0x3f */
|
|
-3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40...0x4f */
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -3, -3, -3, -3, -3, /* 0x50...0x5f */
|
|
-3, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60...0x6f */
|
|
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -3, -3, -3, -3, -3, /* 0x70...0x7f */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0x80...0x8f */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0x90...0x9f */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0xa0...0xaf */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0xb0...0xbf */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0xc0...0xcf */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0xd0...0xdf */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, /* 0xe0...0xef */
|
|
-3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3 /* 0xf0...0xff */
|
|
};
|
|
#endif /* DUK_USE_BASE64_FASTPATH */
|
|
|
|
#if defined(DUK_USE_BASE64_FASTPATH)
|
|
DUK_LOCAL DUK_ALWAYS_INLINE void duk__base64_encode_fast_3(const duk_uint8_t *src, duk_uint8_t *dst) {
|
|
duk_uint_t t;
|
|
|
|
t = (duk_uint_t) src[0];
|
|
t = (t << 8) + (duk_uint_t) src[1];
|
|
t = (t << 8) + (duk_uint_t) src[2];
|
|
|
|
dst[0] = duk__base64_enctab_fast[t >> 18];
|
|
dst[1] = duk__base64_enctab_fast[(t >> 12) & 0x3fU];
|
|
dst[2] = duk__base64_enctab_fast[(t >> 6) & 0x3fU];
|
|
dst[3] = duk__base64_enctab_fast[t & 0x3fU];
|
|
|
|
#if 0
|
|
/* Tested: not faster on x64, most likely due to aliasing between
|
|
* output and input index computation.
|
|
*/
|
|
/* aaaaaabb bbbbcccc ccdddddd */
|
|
dst[0] = duk__base64_enctab_fast[(src[0] >> 2) & 0x3fU];
|
|
dst[1] = duk__base64_enctab_fast[((src[0] << 4) & 0x30U) | ((src[1] >> 4) & 0x0fU)];
|
|
dst[2] = duk__base64_enctab_fast[((src[1] << 2) & 0x3fU) | ((src[2] >> 6) & 0x03U)];
|
|
dst[3] = duk__base64_enctab_fast[src[2] & 0x3fU];
|
|
#endif
|
|
}
|
|
|
|
DUK_LOCAL DUK_ALWAYS_INLINE void duk__base64_encode_fast_2(const duk_uint8_t *src, duk_uint8_t *dst) {
|
|
duk_uint_t t;
|
|
|
|
t = (duk_uint_t) src[0];
|
|
t = (t << 8) + (duk_uint_t) src[1];
|
|
dst[0] = duk__base64_enctab_fast[t >> 10]; /* XXXXXX-- -------- */
|
|
dst[1] = duk__base64_enctab_fast[(t >> 4) & 0x3fU]; /* ------XX XXXX---- */
|
|
dst[2] = duk__base64_enctab_fast[(t << 2) & 0x3fU]; /* -------- ----XXXX */
|
|
dst[3] = DUK_ASC_EQUALS;
|
|
}
|
|
|
|
DUK_LOCAL DUK_ALWAYS_INLINE void duk__base64_encode_fast_1(const duk_uint8_t *src, duk_uint8_t *dst) {
|
|
duk_uint_t t;
|
|
|
|
t = (duk_uint_t) src[0];
|
|
dst[0] = duk__base64_enctab_fast[t >> 2]; /* XXXXXX-- */
|
|
dst[1] = duk__base64_enctab_fast[(t << 4) & 0x3fU]; /* ------XX */
|
|
dst[2] = DUK_ASC_EQUALS;
|
|
dst[3] = DUK_ASC_EQUALS;
|
|
}
|
|
|
|
DUK_LOCAL void duk__base64_encode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst) {
|
|
duk_size_t n;
|
|
const duk_uint8_t *p;
|
|
duk_uint8_t *q;
|
|
|
|
n = srclen;
|
|
p = src;
|
|
q = dst;
|
|
|
|
if (n >= 16U) {
|
|
/* Fast path, unrolled by 4, allows interleaving. Process
|
|
* 12-byte input chunks which encode to 16-char output chunks.
|
|
* Only enter when at least one block is emitted (avoids div+mul
|
|
* for short inputs too).
|
|
*/
|
|
const duk_uint8_t *p_end_fast;
|
|
|
|
p_end_fast = p + ((n / 12U) * 12U);
|
|
DUK_ASSERT(p_end_fast >= p + 12);
|
|
do {
|
|
duk__base64_encode_fast_3(p, q);
|
|
duk__base64_encode_fast_3(p + 3, q + 4);
|
|
duk__base64_encode_fast_3(p + 6, q + 8);
|
|
duk__base64_encode_fast_3(p + 9, q + 12);
|
|
p += 12;
|
|
q += 16;
|
|
} while (DUK_LIKELY(p != p_end_fast));
|
|
|
|
DUK_ASSERT(src + srclen >= p);
|
|
n = (duk_size_t) (src + srclen - p);
|
|
DUK_ASSERT(n < 12U);
|
|
}
|
|
|
|
/* Remainder. */
|
|
while (n >= 3U) {
|
|
duk__base64_encode_fast_3(p, q);
|
|
p += 3;
|
|
q += 4;
|
|
n -= 3U;
|
|
}
|
|
DUK_ASSERT(n == 0U || n == 1U || n == 2U);
|
|
if (n == 1U) {
|
|
duk__base64_encode_fast_1(p, q);
|
|
#if 0 /* Unnecessary. */
|
|
p += 1;
|
|
q += 4;
|
|
n -= 1U;
|
|
#endif
|
|
} else if (n == 2U) {
|
|
duk__base64_encode_fast_2(p, q);
|
|
#if 0 /* Unnecessary. */
|
|
p += 2;
|
|
q += 4;
|
|
n -= 2U;
|
|
#endif
|
|
} else {
|
|
DUK_ASSERT(n == 0U); /* nothing to do */
|
|
;
|
|
}
|
|
}
|
|
#else /* DUK_USE_BASE64_FASTPATH */
|
|
DUK_LOCAL void duk__base64_encode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst) {
|
|
duk_small_uint_t i, npad;
|
|
duk_uint_t t, x, y;
|
|
const duk_uint8_t *p;
|
|
const duk_uint8_t *p_end;
|
|
duk_uint8_t *q;
|
|
|
|
p = src;
|
|
p_end = src + srclen;
|
|
q = dst;
|
|
npad = 0U;
|
|
|
|
while (p < p_end) {
|
|
/* Read 3 bytes into 't', padded by zero. */
|
|
t = 0;
|
|
for (i = 0; i < 3; i++) {
|
|
t = t << 8;
|
|
if (p < p_end) {
|
|
t += (duk_uint_t) (*p++);
|
|
} else {
|
|
/* This only happens on the last loop and we're
|
|
* guaranteed to exit on the next loop.
|
|
*/
|
|
npad++;
|
|
}
|
|
}
|
|
DUK_ASSERT(npad <= 2U);
|
|
|
|
/* Emit 4 encoded characters. If npad > 0, some of the
|
|
* chars will be incorrect (zero bits) but we fix up the
|
|
* padding after the loop. A straightforward 64-byte
|
|
* lookup would be faster and cleaner, but this is shorter.
|
|
*/
|
|
for (i = 0; i < 4; i++) {
|
|
x = ((t >> 18) & 0x3fU);
|
|
t = t << 6;
|
|
|
|
if (x <= 51U) {
|
|
if (x <= 25) {
|
|
y = x + DUK_ASC_UC_A;
|
|
} else {
|
|
y = x - 26 + DUK_ASC_LC_A;
|
|
}
|
|
} else {
|
|
if (x <= 61U) {
|
|
y = x - 52 + DUK_ASC_0;
|
|
} else if (x == 62) {
|
|
y = DUK_ASC_PLUS;
|
|
} else {
|
|
DUK_ASSERT(x == 63);
|
|
y = DUK_ASC_SLASH;
|
|
}
|
|
}
|
|
|
|
*q++ = (duk_uint8_t) y;
|
|
}
|
|
}
|
|
|
|
/* Handle padding by rewriting 0-2 bogus characters at the end.
|
|
*
|
|
* Missing bytes npad base64 example
|
|
* 0 0 ####
|
|
* 1 1 ###=
|
|
* 2 2 ##==
|
|
*/
|
|
DUK_ASSERT(npad <= 2U);
|
|
while (npad > 0U) {
|
|
*(q - npad) = DUK_ASC_EQUALS;
|
|
npad--;
|
|
}
|
|
}
|
|
#endif /* DUK_USE_BASE64_FASTPATH */
|
|
|
|
#if defined(DUK_USE_BASE64_FASTPATH)
|
|
DUK_LOCAL duk_bool_t duk__base64_decode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst, duk_uint8_t **out_dst_final) {
|
|
duk_int_t x;
|
|
duk_uint_t t;
|
|
duk_small_uint_t n_equal;
|
|
duk_int8_t step;
|
|
const duk_uint8_t *p;
|
|
const duk_uint8_t *p_end;
|
|
const duk_uint8_t *p_end_safe;
|
|
duk_uint8_t *q;
|
|
|
|
DUK_ASSERT(src != NULL); /* Required by pointer arithmetic below, which fails for NULL. */
|
|
|
|
p = src;
|
|
p_end = src + srclen;
|
|
p_end_safe = p_end - 8; /* If 'src <= src_end_safe', safe to read 8 bytes. */
|
|
q = dst;
|
|
|
|
/* Alternate between a fast path which processes clean groups with no
|
|
* padding or whitespace, and a slow path which processes one arbitrary
|
|
* group and then re-enters the fast path. This handles e.g. base64
|
|
* with newlines reasonably well because the majority of a line is in
|
|
* the fast path.
|
|
*/
|
|
for (;;) {
|
|
/* Fast path, on each loop handle two 4-char input groups.
|
|
* If both are clean, emit 6 bytes and continue. If first
|
|
* is clean, emit 3 bytes and drop out; otherwise emit
|
|
* nothing and drop out. This approach could be extended to
|
|
* more groups per loop, but for inputs with e.g. periodic
|
|
* newlines (which are common) it might not be an improvement.
|
|
*/
|
|
while (DUK_LIKELY(p <= p_end_safe)) {
|
|
duk_int_t t1, t2;
|
|
|
|
/* The lookup byte is intentionally sign extended to
|
|
* (at least) 32 bits and then ORed. This ensures
|
|
* that is at least 1 byte is negative, the highest
|
|
* bit of the accumulator will be set at the end and
|
|
* we don't need to check every byte.
|
|
*
|
|
* Read all input bytes first before writing output
|
|
* bytes to minimize aliasing.
|
|
*/
|
|
DUK_DDD(DUK_DDDPRINT("fast loop: p=%p, p_end_safe=%p, p_end=%p",
|
|
(const void *) p, (const void *) p_end_safe, (const void *) p_end));
|
|
|
|
t1 = (duk_int_t) duk__base64_dectab_fast[p[0]];
|
|
t1 = (duk_int_t) ((duk_uint_t) t1 << 6) | (duk_int_t) duk__base64_dectab_fast[p[1]];
|
|
t1 = (duk_int_t) ((duk_uint_t) t1 << 6) | (duk_int_t) duk__base64_dectab_fast[p[2]];
|
|
t1 = (duk_int_t) ((duk_uint_t) t1 << 6) | (duk_int_t) duk__base64_dectab_fast[p[3]];
|
|
|
|
t2 = (duk_int_t) duk__base64_dectab_fast[p[4]];
|
|
t2 = (duk_int_t) ((duk_uint_t) t2 << 6) | (duk_int_t) duk__base64_dectab_fast[p[5]];
|
|
t2 = (duk_int_t) ((duk_uint_t) t2 << 6) | (duk_int_t) duk__base64_dectab_fast[p[6]];
|
|
t2 = (duk_int_t) ((duk_uint_t) t2 << 6) | (duk_int_t) duk__base64_dectab_fast[p[7]];
|
|
|
|
q[0] = (duk_uint8_t) (((duk_uint_t) t1 >> 16) & 0xffU);
|
|
q[1] = (duk_uint8_t) (((duk_uint_t) t1 >> 8) & 0xffU);
|
|
q[2] = (duk_uint8_t) ((duk_uint_t) t1 & 0xffU);
|
|
|
|
q[3] = (duk_uint8_t) (((duk_uint_t) t2 >> 16) & 0xffU);
|
|
q[4] = (duk_uint8_t) (((duk_uint_t) t2 >> 8) & 0xffU);
|
|
q[5] = (duk_uint8_t) ((duk_uint_t) t2 & 0xffU);
|
|
|
|
/* Optimistic check using one branch. */
|
|
if (DUK_LIKELY((t1 | t2) >= 0)) {
|
|
p += 8;
|
|
q += 6;
|
|
} else if (t1 >= 0) {
|
|
DUK_DDD(DUK_DDDPRINT("fast loop first group was clean, second was not, process one slow path group"));
|
|
DUK_ASSERT(t2 < 0);
|
|
p += 4;
|
|
q += 3;
|
|
break;
|
|
} else {
|
|
DUK_DDD(DUK_DDDPRINT("fast loop first group was not clean, second does not matter, process one slow path group"));
|
|
DUK_ASSERT(t1 < 0);
|
|
break;
|
|
}
|
|
} /* fast path */
|
|
|
|
/* Slow path step 1: try to scan a 4-character encoded group,
|
|
* end-of-input, or start-of-padding. We exit with:
|
|
* 1. n_chars == 4: full group, no padding, no end-of-input.
|
|
* 2. n_chars < 4: partial group (may also be 0), encountered
|
|
* padding or end of input.
|
|
*
|
|
* The accumulator is initialized to 1; this allows us to detect
|
|
* a full group by comparing >= 0x1000000 without an extra
|
|
* counter variable.
|
|
*/
|
|
t = 1UL;
|
|
for (;;) {
|
|
DUK_DDD(DUK_DDDPRINT("slow loop: p=%p, p_end=%p, t=%lu",
|
|
(const void *) p, (const void *) p_end, (unsigned long) t));
|
|
|
|
if (DUK_LIKELY(p < p_end)) {
|
|
x = duk__base64_dectab_fast[*p++];
|
|
if (DUK_LIKELY(x >= 0)) {
|
|
DUK_ASSERT(x >= 0 && x <= 63);
|
|
t = (t << 6) + (duk_uint_t) x;
|
|
if (t >= 0x1000000UL) {
|
|
break;
|
|
}
|
|
} else if (x == -1) {
|
|
continue; /* allowed ascii whitespace */
|
|
} else if (x == -2) {
|
|
p--;
|
|
break; /* start of padding */
|
|
} else {
|
|
DUK_ASSERT(x == -3);
|
|
goto decode_error;
|
|
}
|
|
} else {
|
|
break; /* end of input */
|
|
}
|
|
} /* slow path step 1 */
|
|
|
|
/* Complete the padding by simulating pad characters,
|
|
* regardless of actual input padding chars.
|
|
*/
|
|
n_equal = 0;
|
|
while (t < 0x1000000UL) {
|
|
t = (t << 6) + 0U;
|
|
n_equal++;
|
|
}
|
|
|
|
/* Slow path step 2: deal with full/partial group, padding,
|
|
* etc. Note that for num chars in [0,3] we intentionally emit
|
|
* 3 bytes but don't step forward that much, buffer space is
|
|
* guaranteed in setup.
|
|
*
|
|
* num chars:
|
|
* 0 #### no output (= step 0)
|
|
* 1 #=== reject, 6 bits of data
|
|
* 2 ##== 12 bits of data, output 1 byte (= step 1)
|
|
* 3 ###= 18 bits of data, output 2 bytes (= step 2)
|
|
* 4 #### 24 bits of data, output 3 bytes (= step 3)
|
|
*/
|
|
q[0] = (duk_uint8_t) ((t >> 16) & 0xffU);
|
|
q[1] = (duk_uint8_t) ((t >> 8) & 0xffU);
|
|
q[2] = (duk_uint8_t) (t & 0xffU);
|
|
|
|
DUK_ASSERT(n_equal <= 4);
|
|
step = duk__base64_decode_nequal_step[n_equal];
|
|
if (DUK_UNLIKELY(step < 0)) {
|
|
goto decode_error;
|
|
}
|
|
q += step;
|
|
|
|
/* Slow path step 3: read and ignore padding and whitespace
|
|
* until (a) next non-padding and non-whitespace character
|
|
* after which we resume the fast path, or (b) end of input.
|
|
* This allows us to accept missing, partial, full, and extra
|
|
* padding cases uniformly. We also support concatenated
|
|
* base-64 documents because we resume scanning afterwards.
|
|
*
|
|
* Note that to support concatenated documents well, the '='
|
|
* padding found inside the input must also allow for 'extra'
|
|
* padding. For example, 'Zm===' decodes to 'f' and has one
|
|
* extra padding char. So, 'Zm===Zm' should decode 'ff', even
|
|
* though the standard break-up would be 'Zm==' + '=Zm' which
|
|
* doesn't make sense.
|
|
*
|
|
* We also accept prepended padding like '==Zm9', because it
|
|
* is equivalent to an empty document with extra padding ('==')
|
|
* followed by a valid document.
|
|
*/
|
|
|
|
for (;;) {
|
|
if (DUK_UNLIKELY(p >= p_end)) {
|
|
goto done;
|
|
}
|
|
x = duk__base64_dectab_fast[*p++];
|
|
if (x == -1 || x == -2) {
|
|
; /* padding or whitespace, keep eating */
|
|
} else {
|
|
p--;
|
|
break; /* backtrack and go back to fast path, even for -1 */
|
|
}
|
|
} /* slow path step 3 */
|
|
} /* outer fast+slow path loop */
|
|
|
|
done:
|
|
DUK_DDD(DUK_DDDPRINT("done; p=%p, p_end=%p",
|
|
(const void *) p, (const void *) p_end));
|
|
|
|
DUK_ASSERT(p == p_end);
|
|
|
|
*out_dst_final = q;
|
|
return 1;
|
|
|
|
decode_error:
|
|
return 0;
|
|
}
|
|
#else /* DUK_USE_BASE64_FASTPATH */
|
|
DUK_LOCAL duk_bool_t duk__base64_decode_helper(const duk_uint8_t *src, duk_size_t srclen, duk_uint8_t *dst, duk_uint8_t **out_dst_final) {
|
|
duk_uint_t t, x;
|
|
duk_int_t y;
|
|
duk_int8_t step;
|
|
const duk_uint8_t *p;
|
|
const duk_uint8_t *p_end;
|
|
duk_uint8_t *q;
|
|
/* 0x09, 0x0a, or 0x0d */
|
|
duk_uint32_t mask_white = (1U << 9) | (1U << 10) | (1U << 13);
|
|
|
|
/* 't' tracks progress of the decoded group:
|
|
*
|
|
* t == 1 no valid chars yet
|
|
* t >= 0x40 1x6 = 6 bits shifted in
|
|
* t >= 0x1000 2x6 = 12 bits shifted in
|
|
* t >= 0x40000 3x6 = 18 bits shifted in
|
|
* t >= 0x1000000 4x6 = 24 bits shifted in
|
|
*
|
|
* By initializing t=1 there's no need for a separate counter for
|
|
* the number of characters found so far.
|
|
*/
|
|
p = src;
|
|
p_end = src + srclen;
|
|
q = dst;
|
|
t = 1UL;
|
|
|
|
for (;;) {
|
|
duk_small_uint_t n_equal;
|
|
|
|
DUK_ASSERT(t >= 1U);
|
|
if (p >= p_end) {
|
|
/* End of input: if input exists, treat like
|
|
* start of padding, finish the block, then
|
|
* re-enter here to see we're done.
|
|
*/
|
|
if (t == 1U) {
|
|
break;
|
|
} else {
|
|
goto simulate_padding;
|
|
}
|
|
}
|
|
|
|
x = *p++;
|
|
|
|
if (x >= 0x41U) {
|
|
/* Valid: a-z and A-Z. */
|
|
DUK_ASSERT(x >= 0x41U && x <= 0xffU);
|
|
if (x >= 0x61U && x <= 0x7aU) {
|
|
y = (duk_int_t) x - 0x61 + 26;
|
|
} else if (x <= 0x5aU) {
|
|
y = (duk_int_t) x - 0x41;
|
|
} else {
|
|
goto decode_error;
|
|
}
|
|
} else if (x >= 0x30U) {
|
|
/* Valid: 0-9 and =. */
|
|
DUK_ASSERT(x >= 0x30U && x <= 0x40U);
|
|
if (x <= 0x39U) {
|
|
y = (duk_int_t) x - 0x30 + 52;
|
|
} else if (x == 0x3dU) {
|
|
/* Skip padding and whitespace unless we're in the
|
|
* middle of a block. Otherwise complete group by
|
|
* simulating shifting in the correct padding.
|
|
*/
|
|
if (t == 1U) {
|
|
continue;
|
|
}
|
|
goto simulate_padding;
|
|
} else {
|
|
goto decode_error;
|
|
}
|
|
} else if (x >= 0x20U) {
|
|
/* Valid: +, /, and 0x20 whitespace. */
|
|
DUK_ASSERT(x >= 0x20U && x <= 0x2fU);
|
|
if (x == 0x2bU) {
|
|
y = 62;
|
|
} else if (x == 0x2fU) {
|
|
y = 63;
|
|
} else if (x == 0x20U) {
|
|
continue;
|
|
} else {
|
|
goto decode_error;
|
|
}
|
|
} else {
|
|
/* Valid: whitespace. */
|
|
duk_uint32_t m;
|
|
DUK_ASSERT(x < 0x20U); /* 0x00 to 0x1f */
|
|
m = (1U << x);
|
|
if (mask_white & m) {
|
|
/* Allow basic ASCII whitespace. */
|
|
continue;
|
|
} else {
|
|
goto decode_error;
|
|
}
|
|
}
|
|
|
|
DUK_ASSERT(y >= 0 && y <= 63);
|
|
t = (t << 6) + (duk_uint_t) y;
|
|
if (t < 0x1000000UL) {
|
|
continue;
|
|
}
|
|
/* fall through; no padding will be added */
|
|
|
|
simulate_padding:
|
|
n_equal = 0;
|
|
while (t < 0x1000000UL) {
|
|
t = (t << 6) + 0U;
|
|
n_equal++;
|
|
}
|
|
|
|
/* Output 3 bytes from 't' and advance as needed. */
|
|
q[0] = (duk_uint8_t) ((t >> 16) & 0xffU);
|
|
q[1] = (duk_uint8_t) ((t >> 8) & 0xffU);
|
|
q[2] = (duk_uint8_t) (t & 0xffU);
|
|
|
|
DUK_ASSERT(n_equal <= 4U);
|
|
step = duk__base64_decode_nequal_step[n_equal];
|
|
if (step < 0) {
|
|
goto decode_error;
|
|
}
|
|
q += step;
|
|
|
|
/* Re-enter loop. The actual padding characters are skipped
|
|
* by the main loop. This handles cases like missing, partial,
|
|
* full, and extra padding, and allows parsing of concatenated
|
|
* documents (with extra padding) like: Zm===Zm. Also extra
|
|
* prepended padding is accepted: ===Zm9v.
|
|
*/
|
|
t = 1U;
|
|
}
|
|
DUK_ASSERT(t == 1UL);
|
|
|
|
*out_dst_final = q;
|
|
return 1;
|
|
|
|
decode_error:
|
|
return 0;
|
|
}
|
|
#endif /* DUK_USE_BASE64_FASTPATH */
|
|
|
|
DUK_EXTERNAL const char *duk_base64_encode(duk_hthread *thr, duk_idx_t idx) {
|
|
const duk_uint8_t *src;
|
|
duk_size_t srclen;
|
|
duk_size_t dstlen;
|
|
duk_uint8_t *dst;
|
|
const char *ret;
|
|
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
|
|
idx = duk_require_normalize_index(thr, idx);
|
|
src = duk__prep_codec_arg(thr, idx, &srclen);
|
|
DUK_ASSERT(src != NULL);
|
|
|
|
/* Compute exact output length. Computation must not wrap; this
|
|
* limit works for 32-bit size_t:
|
|
* >>> srclen = 3221225469
|
|
* >>> '%x' % ((srclen + 2) / 3 * 4)
|
|
* 'fffffffc'
|
|
*/
|
|
if (srclen > 3221225469UL) {
|
|
goto type_error;
|
|
}
|
|
dstlen = (srclen + 2U) / 3U * 4U;
|
|
dst = (duk_uint8_t *) duk_push_fixed_buffer_nozero(thr, dstlen);
|
|
|
|
duk__base64_encode_helper((const duk_uint8_t *) src, srclen, dst);
|
|
|
|
ret = duk_buffer_to_string(thr, -1); /* Safe, result is ASCII. */
|
|
duk_replace(thr, idx);
|
|
return ret;
|
|
|
|
type_error:
|
|
DUK_ERROR_TYPE(thr, DUK_STR_BASE64_ENCODE_FAILED);
|
|
DUK_WO_NORETURN(return NULL;);
|
|
}
|
|
|
|
DUK_EXTERNAL void duk_base64_decode(duk_hthread *thr, duk_idx_t idx) {
|
|
const duk_uint8_t *src;
|
|
duk_size_t srclen;
|
|
duk_size_t dstlen;
|
|
duk_uint8_t *dst;
|
|
duk_uint8_t *dst_final;
|
|
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
|
|
idx = duk_require_normalize_index(thr, idx);
|
|
src = duk__prep_codec_arg(thr, idx, &srclen);
|
|
DUK_ASSERT(src != NULL);
|
|
|
|
/* Round up and add safety margin. Avoid addition before division to
|
|
* avoid possibility of wrapping. Margin includes +3 for rounding up,
|
|
* and +3 for one extra group: the decoder may emit and then backtrack
|
|
* a full group (3 bytes) from zero-sized input for technical reasons.
|
|
* Similarly, 'xx' may ecause 1+3 = bytes to be emitted and then
|
|
* backtracked.
|
|
*/
|
|
dstlen = (srclen / 4) * 3 + 6; /* upper limit, assuming no whitespace etc */
|
|
dst = (duk_uint8_t *) duk_push_dynamic_buffer(thr, dstlen);
|
|
/* Note: for dstlen=0, dst may be NULL */
|
|
|
|
if (!duk__base64_decode_helper((const duk_uint8_t *) src, srclen, dst, &dst_final)) {
|
|
goto type_error;
|
|
}
|
|
|
|
/* XXX: convert to fixed buffer? */
|
|
(void) duk_resize_buffer(thr, -1, (duk_size_t) (dst_final - dst));
|
|
duk_replace(thr, idx);
|
|
return;
|
|
|
|
type_error:
|
|
DUK_ERROR_TYPE(thr, DUK_STR_BASE64_DECODE_FAILED);
|
|
DUK_WO_NORETURN(return;);
|
|
}
|
|
#else /* DUK_USE_BASE64_SUPPORT */
|
|
DUK_EXTERNAL const char *duk_base64_encode(duk_hthread *thr, duk_idx_t idx) {
|
|
DUK_UNREF(idx);
|
|
DUK_ERROR_UNSUPPORTED(thr);
|
|
DUK_WO_NORETURN(return NULL;);
|
|
}
|
|
|
|
DUK_EXTERNAL void duk_base64_decode(duk_hthread *thr, duk_idx_t idx) {
|
|
DUK_UNREF(idx);
|
|
DUK_ERROR_UNSUPPORTED(thr);
|
|
DUK_WO_NORETURN(return;);
|
|
}
|
|
#endif /* DUK_USE_BASE64_SUPPORT */
|
|
|
|
/*
|
|
* Hex
|
|
*/
|
|
|
|
#if defined(DUK_USE_HEX_SUPPORT)
|
|
DUK_EXTERNAL const char *duk_hex_encode(duk_hthread *thr, duk_idx_t idx) {
|
|
const duk_uint8_t *inp;
|
|
duk_size_t len;
|
|
duk_size_t i;
|
|
duk_uint8_t *buf;
|
|
const char *ret;
|
|
#if defined(DUK_USE_HEX_FASTPATH)
|
|
duk_size_t len_safe;
|
|
duk_uint16_t *p16;
|
|
#endif
|
|
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
|
|
idx = duk_require_normalize_index(thr, idx);
|
|
inp = duk__prep_codec_arg(thr, idx, &len);
|
|
DUK_ASSERT(inp != NULL);
|
|
|
|
/* Fixed buffer, no zeroing because we'll fill all the data. */
|
|
buf = (duk_uint8_t *) duk_push_fixed_buffer_nozero(thr, len * 2);
|
|
DUK_ASSERT(buf != NULL);
|
|
|
|
#if defined(DUK_USE_HEX_FASTPATH)
|
|
DUK_ASSERT((((duk_size_t) buf) & 0x01U) == 0); /* pointer is aligned, guaranteed for fixed buffer */
|
|
p16 = (duk_uint16_t *) (void *) buf;
|
|
len_safe = len & ~0x03U;
|
|
for (i = 0; i < len_safe; i += 4) {
|
|
p16[0] = duk_hex_enctab[inp[i]];
|
|
p16[1] = duk_hex_enctab[inp[i + 1]];
|
|
p16[2] = duk_hex_enctab[inp[i + 2]];
|
|
p16[3] = duk_hex_enctab[inp[i + 3]];
|
|
p16 += 4;
|
|
}
|
|
for (; i < len; i++) {
|
|
*p16++ = duk_hex_enctab[inp[i]];
|
|
}
|
|
#else /* DUK_USE_HEX_FASTPATH */
|
|
for (i = 0; i < len; i++) {
|
|
duk_small_uint_t t;
|
|
t = (duk_small_uint_t) inp[i];
|
|
buf[i*2 + 0] = duk_lc_digits[t >> 4];
|
|
buf[i*2 + 1] = duk_lc_digits[t & 0x0f];
|
|
}
|
|
#endif /* DUK_USE_HEX_FASTPATH */
|
|
|
|
/* XXX: Using a string return value forces a string intern which is
|
|
* not always necessary. As a rough performance measure, hex encode
|
|
* time for tests/perf/test-hex-encode.js dropped from ~35s to ~15s
|
|
* without string coercion. Change to returning a buffer and let the
|
|
* caller coerce to string if necessary?
|
|
*/
|
|
|
|
ret = duk_buffer_to_string(thr, -1); /* Safe, result is ASCII. */
|
|
duk_replace(thr, idx);
|
|
return ret;
|
|
}
|
|
|
|
DUK_EXTERNAL void duk_hex_decode(duk_hthread *thr, duk_idx_t idx) {
|
|
const duk_uint8_t *inp;
|
|
duk_size_t len;
|
|
duk_size_t i;
|
|
duk_int_t t;
|
|
duk_uint8_t *buf;
|
|
#if defined(DUK_USE_HEX_FASTPATH)
|
|
duk_int_t chk;
|
|
duk_uint8_t *p;
|
|
duk_size_t len_safe;
|
|
#endif
|
|
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
|
|
idx = duk_require_normalize_index(thr, idx);
|
|
inp = duk__prep_codec_arg(thr, idx, &len);
|
|
DUK_ASSERT(inp != NULL);
|
|
|
|
if (len & 0x01) {
|
|
goto type_error;
|
|
}
|
|
|
|
/* Fixed buffer, no zeroing because we'll fill all the data. */
|
|
buf = (duk_uint8_t *) duk_push_fixed_buffer_nozero(thr, len / 2);
|
|
DUK_ASSERT(buf != NULL);
|
|
|
|
#if defined(DUK_USE_HEX_FASTPATH)
|
|
p = buf;
|
|
len_safe = len & ~0x07U;
|
|
for (i = 0; i < len_safe; i += 8) {
|
|
t = ((duk_int_t) duk_hex_dectab_shift4[inp[i]]) |
|
|
((duk_int_t) duk_hex_dectab[inp[i + 1]]);
|
|
chk = t;
|
|
p[0] = (duk_uint8_t) t;
|
|
t = ((duk_int_t) duk_hex_dectab_shift4[inp[i + 2]]) |
|
|
((duk_int_t) duk_hex_dectab[inp[i + 3]]);
|
|
chk |= t;
|
|
p[1] = (duk_uint8_t) t;
|
|
t = ((duk_int_t) duk_hex_dectab_shift4[inp[i + 4]]) |
|
|
((duk_int_t) duk_hex_dectab[inp[i + 5]]);
|
|
chk |= t;
|
|
p[2] = (duk_uint8_t) t;
|
|
t = ((duk_int_t) duk_hex_dectab_shift4[inp[i + 6]]) |
|
|
((duk_int_t) duk_hex_dectab[inp[i + 7]]);
|
|
chk |= t;
|
|
p[3] = (duk_uint8_t) t;
|
|
p += 4;
|
|
|
|
/* Check if any lookup above had a negative result. */
|
|
if (DUK_UNLIKELY(chk < 0)) {
|
|
goto type_error;
|
|
}
|
|
}
|
|
for (; i < len; i += 2) {
|
|
/* First cast to duk_int_t to sign extend, second cast to
|
|
* duk_uint_t to avoid signed left shift, and final cast to
|
|
* duk_int_t result type.
|
|
*/
|
|
t = (duk_int_t) ((((duk_uint_t) (duk_int_t) duk_hex_dectab[inp[i]]) << 4U) |
|
|
((duk_uint_t) (duk_int_t) duk_hex_dectab[inp[i + 1]]));
|
|
if (DUK_UNLIKELY(t < 0)) {
|
|
goto type_error;
|
|
}
|
|
*p++ = (duk_uint8_t) t;
|
|
}
|
|
#else /* DUK_USE_HEX_FASTPATH */
|
|
for (i = 0; i < len; i += 2) {
|
|
/* For invalid characters the value -1 gets extended to
|
|
* at least 16 bits. If either nybble is invalid, the
|
|
* resulting 't' will be < 0.
|
|
*/
|
|
t = (duk_int_t) ((((duk_uint_t) (duk_int_t) duk_hex_dectab[inp[i]]) << 4U) |
|
|
((duk_uint_t) (duk_int_t) duk_hex_dectab[inp[i + 1]]));
|
|
if (DUK_UNLIKELY(t < 0)) {
|
|
goto type_error;
|
|
}
|
|
buf[i >> 1] = (duk_uint8_t) t;
|
|
}
|
|
#endif /* DUK_USE_HEX_FASTPATH */
|
|
|
|
duk_replace(thr, idx);
|
|
return;
|
|
|
|
type_error:
|
|
DUK_ERROR_TYPE(thr, DUK_STR_HEX_DECODE_FAILED);
|
|
DUK_WO_NORETURN(return;);
|
|
}
|
|
#else /* DUK_USE_HEX_SUPPORT */
|
|
DUK_EXTERNAL const char *duk_hex_encode(duk_hthread *thr, duk_idx_t idx) {
|
|
DUK_UNREF(idx);
|
|
DUK_ERROR_UNSUPPORTED(thr);
|
|
DUK_WO_NORETURN(return NULL;);
|
|
}
|
|
DUK_EXTERNAL void duk_hex_decode(duk_hthread *thr, duk_idx_t idx) {
|
|
DUK_UNREF(idx);
|
|
DUK_ERROR_UNSUPPORTED(thr);
|
|
DUK_WO_NORETURN(return;);
|
|
}
|
|
#endif /* DUK_USE_HEX_SUPPORT */
|
|
|
|
/*
|
|
* JSON
|
|
*/
|
|
|
|
#if defined(DUK_USE_JSON_SUPPORT)
|
|
DUK_EXTERNAL const char *duk_json_encode(duk_hthread *thr, duk_idx_t idx) {
|
|
#if defined(DUK_USE_ASSERTIONS)
|
|
duk_idx_t top_at_entry;
|
|
#endif
|
|
const char *ret;
|
|
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
#if defined(DUK_USE_ASSERTIONS)
|
|
top_at_entry = duk_get_top(thr);
|
|
#endif
|
|
|
|
idx = duk_require_normalize_index(thr, idx);
|
|
duk_bi_json_stringify_helper(thr,
|
|
idx /*idx_value*/,
|
|
DUK_INVALID_INDEX /*idx_replacer*/,
|
|
DUK_INVALID_INDEX /*idx_space*/,
|
|
0 /*flags*/);
|
|
DUK_ASSERT(duk_is_string(thr, -1));
|
|
duk_replace(thr, idx);
|
|
ret = duk_get_string(thr, idx);
|
|
|
|
DUK_ASSERT(duk_get_top(thr) == top_at_entry);
|
|
|
|
return ret;
|
|
}
|
|
|
|
DUK_EXTERNAL void duk_json_decode(duk_hthread *thr, duk_idx_t idx) {
|
|
#if defined(DUK_USE_ASSERTIONS)
|
|
duk_idx_t top_at_entry;
|
|
#endif
|
|
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
#if defined(DUK_USE_ASSERTIONS)
|
|
top_at_entry = duk_get_top(thr);
|
|
#endif
|
|
|
|
idx = duk_require_normalize_index(thr, idx);
|
|
duk_bi_json_parse_helper(thr,
|
|
idx /*idx_value*/,
|
|
DUK_INVALID_INDEX /*idx_reviver*/,
|
|
0 /*flags*/);
|
|
duk_replace(thr, idx);
|
|
|
|
DUK_ASSERT(duk_get_top(thr) == top_at_entry);
|
|
}
|
|
#else /* DUK_USE_JSON_SUPPORT */
|
|
DUK_EXTERNAL const char *duk_json_encode(duk_hthread *thr, duk_idx_t idx) {
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
DUK_UNREF(idx);
|
|
DUK_ERROR_UNSUPPORTED(thr);
|
|
DUK_WO_NORETURN(return NULL;);
|
|
}
|
|
|
|
DUK_EXTERNAL void duk_json_decode(duk_hthread *thr, duk_idx_t idx) {
|
|
DUK_ASSERT_API_ENTRY(thr);
|
|
DUK_UNREF(idx);
|
|
DUK_ERROR_UNSUPPORTED(thr);
|
|
DUK_WO_NORETURN(return;);
|
|
}
|
|
#endif /* DUK_USE_JSON_SUPPORT */
|