534 lines
17 KiB
C
534 lines
17 KiB
C
|
/*
|
||
|
* WHATWG Encoding API built-ins
|
||
|
*
|
||
|
* API specification: https://encoding.spec.whatwg.org/#api
|
||
|
* Web IDL: https://www.w3.org/TR/WebIDL/
|
||
|
*/
|
||
|
|
||
|
#include "duk_internal.h"
|
||
|
|
||
|
/*
|
||
|
* Data structures for encoding/decoding
|
||
|
*/
|
||
|
|
||
|
typedef struct {
|
||
|
duk_uint8_t *out; /* where to write next byte(s) */
|
||
|
duk_codepoint_t lead; /* lead surrogate */
|
||
|
} duk__encode_context;
|
||
|
|
||
|
typedef struct {
|
||
|
/* UTF-8 decoding state */
|
||
|
duk_codepoint_t codepoint; /* built up incrementally */
|
||
|
duk_uint8_t upper; /* max value of next byte (decode error otherwise) */
|
||
|
duk_uint8_t lower; /* min value of next byte (ditto) */
|
||
|
duk_uint8_t needed; /* how many more bytes we need */
|
||
|
duk_uint8_t bom_handled; /* BOM seen or no longer expected */
|
||
|
|
||
|
/* Decoder configuration */
|
||
|
duk_uint8_t fatal;
|
||
|
duk_uint8_t ignore_bom;
|
||
|
} duk__decode_context;
|
||
|
|
||
|
/* The signed duk_codepoint_t type is used to signal a decoded codepoint
|
||
|
* (>= 0) or various other states using negative values.
|
||
|
*/
|
||
|
#define DUK__CP_CONTINUE (-1) /* continue to next byte, no completed codepoint */
|
||
|
#define DUK__CP_ERROR (-2) /* decoding error */
|
||
|
#define DUK__CP_RETRY (-3) /* decoding error; retry last byte */
|
||
|
|
||
|
/*
|
||
|
* Raw helpers for encoding/decoding
|
||
|
*/
|
||
|
|
||
|
/* Emit UTF-8 (= CESU-8) encoded U+FFFD (replacement char), i.e. ef bf bd. */
|
||
|
DUK_LOCAL duk_uint8_t *duk__utf8_emit_repl(duk_uint8_t *ptr) {
|
||
|
*ptr++ = 0xef;
|
||
|
*ptr++ = 0xbf;
|
||
|
*ptr++ = 0xbd;
|
||
|
return ptr;
|
||
|
}
|
||
|
|
||
|
DUK_LOCAL void duk__utf8_decode_init(duk__decode_context *dec_ctx) {
|
||
|
/* (Re)init the decoding state of 'dec_ctx' but leave decoder
|
||
|
* configuration fields untouched.
|
||
|
*/
|
||
|
dec_ctx->codepoint = 0x0000L;
|
||
|
dec_ctx->upper = 0xbf;
|
||
|
dec_ctx->lower = 0x80;
|
||
|
dec_ctx->needed = 0;
|
||
|
dec_ctx->bom_handled = 0;
|
||
|
}
|
||
|
|
||
|
DUK_LOCAL duk_codepoint_t duk__utf8_decode_next(duk__decode_context *dec_ctx, duk_uint8_t x) {
|
||
|
/*
|
||
|
* UTF-8 algorithm based on the Encoding specification:
|
||
|
* https://encoding.spec.whatwg.org/#utf-8-decoder
|
||
|
*
|
||
|
* Two main states: decoding initial byte vs. decoding continuation
|
||
|
* bytes. Shortest length encoding is validated by restricting the
|
||
|
* allowed range of first continuation byte using 'lower' and 'upper'.
|
||
|
*/
|
||
|
|
||
|
if (dec_ctx->needed == 0) {
|
||
|
/* process initial byte */
|
||
|
if (x <= 0x7f) {
|
||
|
/* U+0000-U+007F, 1 byte (ASCII) */
|
||
|
return (duk_codepoint_t) x;
|
||
|
} else if (x >= 0xc2 && x <= 0xdf) {
|
||
|
/* U+0080-U+07FF, 2 bytes */
|
||
|
dec_ctx->needed = 1;
|
||
|
dec_ctx->codepoint = x & 0x1f;
|
||
|
DUK_ASSERT(dec_ctx->lower == 0x80);
|
||
|
DUK_ASSERT(dec_ctx->upper == 0xbf);
|
||
|
return DUK__CP_CONTINUE;
|
||
|
} else if (x >= 0xe0 && x <= 0xef) {
|
||
|
/* U+0800-U+FFFF, 3 bytes */
|
||
|
if (x == 0xe0) {
|
||
|
dec_ctx->lower = 0xa0;
|
||
|
DUK_ASSERT(dec_ctx->upper == 0xbf);
|
||
|
} else if (x == 0xed) {
|
||
|
DUK_ASSERT(dec_ctx->lower == 0x80);
|
||
|
dec_ctx->upper = 0x9f;
|
||
|
}
|
||
|
dec_ctx->needed = 2;
|
||
|
dec_ctx->codepoint = x & 0x0f;
|
||
|
return DUK__CP_CONTINUE;
|
||
|
} else if (x >= 0xf0 && x <= 0xf4) {
|
||
|
/* U+010000-U+10FFFF, 4 bytes */
|
||
|
if (x == 0xf0) {
|
||
|
dec_ctx->lower = 0x90;
|
||
|
DUK_ASSERT(dec_ctx->upper == 0xbf);
|
||
|
} else if (x == 0xf4) {
|
||
|
DUK_ASSERT(dec_ctx->lower == 0x80);
|
||
|
dec_ctx->upper = 0x8f;
|
||
|
}
|
||
|
dec_ctx->needed = 3;
|
||
|
dec_ctx->codepoint = x & 0x07;
|
||
|
return DUK__CP_CONTINUE;
|
||
|
} else {
|
||
|
/* not a legal initial byte */
|
||
|
return DUK__CP_ERROR;
|
||
|
}
|
||
|
} else {
|
||
|
/* process continuation byte */
|
||
|
if (x >= dec_ctx->lower && x <= dec_ctx->upper) {
|
||
|
dec_ctx->lower = 0x80;
|
||
|
dec_ctx->upper = 0xbf;
|
||
|
dec_ctx->codepoint = (dec_ctx->codepoint << 6) | (x & 0x3f);
|
||
|
if (--dec_ctx->needed > 0) {
|
||
|
/* need more bytes */
|
||
|
return DUK__CP_CONTINUE;
|
||
|
} else {
|
||
|
/* got a codepoint */
|
||
|
duk_codepoint_t ret;
|
||
|
DUK_ASSERT(dec_ctx->codepoint <= 0x10ffffL); /* Decoding rules guarantee. */
|
||
|
ret = dec_ctx->codepoint;
|
||
|
dec_ctx->codepoint = 0x0000L;
|
||
|
dec_ctx->needed = 0;
|
||
|
return ret;
|
||
|
}
|
||
|
} else {
|
||
|
/* We just encountered an illegal UTF-8 continuation byte. This might
|
||
|
* be the initial byte of the next character; if we return a plain
|
||
|
* error status and the decoder is in replacement mode, the character
|
||
|
* will be masked. We still need to alert the caller to the error
|
||
|
* though.
|
||
|
*/
|
||
|
dec_ctx->codepoint = 0x0000L;
|
||
|
dec_ctx->needed = 0;
|
||
|
dec_ctx->lower = 0x80;
|
||
|
dec_ctx->upper = 0xbf;
|
||
|
return DUK__CP_RETRY;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if defined(DUK_USE_ENCODING_BUILTINS)
|
||
|
DUK_LOCAL void duk__utf8_encode_char(void *udata, duk_codepoint_t codepoint) {
|
||
|
duk__encode_context *enc_ctx;
|
||
|
|
||
|
DUK_ASSERT(codepoint >= 0);
|
||
|
enc_ctx = (duk__encode_context *) udata;
|
||
|
DUK_ASSERT(enc_ctx != NULL);
|
||
|
|
||
|
#if !defined(DUK_USE_PREFER_SIZE)
|
||
|
if (codepoint <= 0x7f && enc_ctx->lead == 0x0000L) {
|
||
|
/* Fast path for ASCII. */
|
||
|
*enc_ctx->out++ = (duk_uint8_t) codepoint;
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (DUK_UNLIKELY(codepoint > 0x10ffffL)) {
|
||
|
/* cannot legally encode in UTF-8 */
|
||
|
codepoint = DUK_UNICODE_CP_REPLACEMENT_CHARACTER;
|
||
|
} else if (codepoint >= 0xd800L && codepoint <= 0xdfffL) {
|
||
|
if (codepoint <= 0xdbffL) {
|
||
|
/* high surrogate */
|
||
|
duk_codepoint_t prev_lead = enc_ctx->lead;
|
||
|
enc_ctx->lead = codepoint;
|
||
|
if (prev_lead == 0x0000L) {
|
||
|
/* high surrogate, no output */
|
||
|
return;
|
||
|
} else {
|
||
|
/* consecutive high surrogates, consider first one unpaired */
|
||
|
codepoint = DUK_UNICODE_CP_REPLACEMENT_CHARACTER;
|
||
|
}
|
||
|
} else {
|
||
|
/* low surrogate */
|
||
|
if (enc_ctx->lead != 0x0000L) {
|
||
|
codepoint = (duk_codepoint_t) (0x010000L + ((enc_ctx->lead - 0xd800L) << 10) + (codepoint - 0xdc00L));
|
||
|
enc_ctx->lead = 0x0000L;
|
||
|
} else {
|
||
|
/* unpaired low surrogate */
|
||
|
DUK_ASSERT(enc_ctx->lead == 0x0000L);
|
||
|
codepoint = DUK_UNICODE_CP_REPLACEMENT_CHARACTER;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
if (enc_ctx->lead != 0x0000L) {
|
||
|
/* unpaired high surrogate: emit replacement character and the input codepoint */
|
||
|
enc_ctx->lead = 0x0000L;
|
||
|
enc_ctx->out = duk__utf8_emit_repl(enc_ctx->out);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Codepoint may be original input, a decoded surrogate pair, or may
|
||
|
* have been replaced with U+FFFD.
|
||
|
*/
|
||
|
enc_ctx->out += duk_unicode_encode_xutf8((duk_ucodepoint_t) codepoint, enc_ctx->out);
|
||
|
}
|
||
|
#endif /* DUK_USE_ENCODING_BUILTINS */
|
||
|
|
||
|
/* Shared helper for buffer-to-string using a TextDecoder() compatible UTF-8
|
||
|
* decoder.
|
||
|
*/
|
||
|
DUK_LOCAL duk_ret_t duk__decode_helper(duk_hthread *thr, duk__decode_context *dec_ctx) {
|
||
|
const duk_uint8_t *input;
|
||
|
duk_size_t len = 0;
|
||
|
duk_size_t len_tmp;
|
||
|
duk_bool_t stream = 0;
|
||
|
duk_codepoint_t codepoint;
|
||
|
duk_uint8_t *output;
|
||
|
const duk_uint8_t *in;
|
||
|
duk_uint8_t *out;
|
||
|
|
||
|
DUK_ASSERT(dec_ctx != NULL);
|
||
|
|
||
|
/* Careful with input buffer pointer: any side effects involving
|
||
|
* code execution (e.g. getters, coercion calls, and finalizers)
|
||
|
* may cause a resize and invalidate a pointer we've read. This
|
||
|
* is why the pointer is actually looked up at the last minute.
|
||
|
* Argument validation must still happen first to match WHATWG
|
||
|
* required side effect order.
|
||
|
*/
|
||
|
|
||
|
if (duk_is_undefined(thr, 0)) {
|
||
|
duk_push_fixed_buffer_nozero(thr, 0);
|
||
|
duk_replace(thr, 0);
|
||
|
}
|
||
|
(void) duk_require_buffer_data(thr, 0, &len); /* Need 'len', avoid pointer. */
|
||
|
|
||
|
if (duk_check_type_mask(thr, 1, DUK_TYPE_MASK_UNDEFINED |
|
||
|
DUK_TYPE_MASK_NULL |
|
||
|
DUK_TYPE_MASK_NONE)) {
|
||
|
/* Use defaults, treat missing value like undefined. */
|
||
|
} else {
|
||
|
duk_require_type_mask(thr, 1, DUK_TYPE_MASK_UNDEFINED |
|
||
|
DUK_TYPE_MASK_NULL |
|
||
|
DUK_TYPE_MASK_LIGHTFUNC |
|
||
|
DUK_TYPE_MASK_BUFFER |
|
||
|
DUK_TYPE_MASK_OBJECT);
|
||
|
if (duk_get_prop_literal(thr, 1, "stream")) {
|
||
|
stream = duk_to_boolean(thr, -1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Allowance is 3*len in the general case because all bytes may potentially
|
||
|
* become U+FFFD. If the first byte completes a non-BMP codepoint it will
|
||
|
* decode to a CESU-8 surrogate pair (6 bytes) so we allow 3 extra bytes to
|
||
|
* compensate: (1*3)+3 = 6. Non-BMP codepoints are safe otherwise because
|
||
|
* the 4->6 expansion is well under the 3x allowance.
|
||
|
*
|
||
|
* XXX: As with TextEncoder, need a better buffer allocation strategy here.
|
||
|
*/
|
||
|
if (len >= (DUK_HBUFFER_MAX_BYTELEN / 3) - 3) {
|
||
|
DUK_ERROR_TYPE(thr, DUK_STR_RESULT_TOO_LONG);
|
||
|
DUK_WO_NORETURN(return 0;);
|
||
|
}
|
||
|
output = (duk_uint8_t *) duk_push_fixed_buffer_nozero(thr, 3 + (3 * len)); /* used parts will be always manually written over */
|
||
|
|
||
|
input = (const duk_uint8_t *) duk_get_buffer_data(thr, 0, &len_tmp);
|
||
|
DUK_ASSERT(input != NULL || len == 0);
|
||
|
if (DUK_UNLIKELY(len != len_tmp)) {
|
||
|
/* Very unlikely but possible: source buffer was resized by
|
||
|
* a side effect when fixed buffer was pushed. Output buffer
|
||
|
* may not be large enough to hold output, so just fail if
|
||
|
* length has changed.
|
||
|
*/
|
||
|
DUK_D(DUK_DPRINT("input buffer resized by side effect, fail"));
|
||
|
goto fail_type;
|
||
|
}
|
||
|
|
||
|
/* From this point onwards it's critical that no side effect occur
|
||
|
* which may disturb 'input': finalizer execution, property accesses,
|
||
|
* active coercions, etc. Even an allocation related mark-and-sweep
|
||
|
* may affect the pointer because it may trigger a pending finalizer.
|
||
|
*/
|
||
|
|
||
|
in = input;
|
||
|
out = output;
|
||
|
while (in < input + len) {
|
||
|
codepoint = duk__utf8_decode_next(dec_ctx, *in++);
|
||
|
if (codepoint < 0) {
|
||
|
if (codepoint == DUK__CP_CONTINUE) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Decoding error with or without retry. */
|
||
|
DUK_ASSERT(codepoint == DUK__CP_ERROR || codepoint == DUK__CP_RETRY);
|
||
|
if (codepoint == DUK__CP_RETRY) {
|
||
|
--in; /* retry last byte */
|
||
|
}
|
||
|
/* replacement mode: replace with U+FFFD */
|
||
|
codepoint = DUK_UNICODE_CP_REPLACEMENT_CHARACTER;
|
||
|
if (dec_ctx->fatal) {
|
||
|
/* fatal mode: throw a TypeError */
|
||
|
goto fail_type;
|
||
|
}
|
||
|
/* Continue with 'codepoint', Unicode replacement. */
|
||
|
}
|
||
|
DUK_ASSERT(codepoint >= 0x0000L && codepoint <= 0x10ffffL);
|
||
|
|
||
|
if (!dec_ctx->bom_handled) {
|
||
|
dec_ctx->bom_handled = 1;
|
||
|
if (codepoint == 0xfeffL && !dec_ctx->ignore_bom) {
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
out += duk_unicode_encode_cesu8((duk_ucodepoint_t) codepoint, out);
|
||
|
DUK_ASSERT(out <= output + (3 + (3 * len)));
|
||
|
}
|
||
|
|
||
|
if (!stream) {
|
||
|
if (dec_ctx->needed != 0) {
|
||
|
/* truncated sequence at end of buffer */
|
||
|
if (dec_ctx->fatal) {
|
||
|
goto fail_type;
|
||
|
} else {
|
||
|
out += duk_unicode_encode_cesu8(DUK_UNICODE_CP_REPLACEMENT_CHARACTER, out);
|
||
|
DUK_ASSERT(out <= output + (3 + (3 * len)));
|
||
|
}
|
||
|
}
|
||
|
duk__utf8_decode_init(dec_ctx); /* Initialize decoding state for potential reuse. */
|
||
|
}
|
||
|
|
||
|
/* Output buffer is fixed and thus stable even if there had been
|
||
|
* side effects (which there shouldn't be).
|
||
|
*/
|
||
|
duk_push_lstring(thr, (const char *) output, (duk_size_t) (out - output));
|
||
|
return 1;
|
||
|
|
||
|
fail_type:
|
||
|
DUK_ERROR_TYPE(thr, DUK_STR_UTF8_DECODE_FAILED);
|
||
|
DUK_WO_NORETURN(return 0;);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Built-in bindings
|
||
|
*/
|
||
|
|
||
|
#if defined(DUK_USE_ENCODING_BUILTINS)
|
||
|
DUK_INTERNAL duk_ret_t duk_bi_textencoder_constructor(duk_hthread *thr) {
|
||
|
/* TextEncoder currently requires no persistent state, so the constructor
|
||
|
* does nothing on purpose.
|
||
|
*/
|
||
|
|
||
|
duk_require_constructor_call(thr);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
DUK_INTERNAL duk_ret_t duk_bi_textencoder_prototype_encoding_getter(duk_hthread *thr) {
|
||
|
duk_push_literal(thr, "utf-8");
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
DUK_INTERNAL duk_ret_t duk_bi_textencoder_prototype_encode(duk_hthread *thr) {
|
||
|
duk__encode_context enc_ctx;
|
||
|
duk_size_t len;
|
||
|
duk_size_t final_len;
|
||
|
duk_uint8_t *output;
|
||
|
|
||
|
DUK_ASSERT_TOP(thr, 1);
|
||
|
if (duk_is_undefined(thr, 0)) {
|
||
|
len = 0;
|
||
|
} else {
|
||
|
duk_hstring *h_input;
|
||
|
|
||
|
h_input = duk_to_hstring(thr, 0);
|
||
|
DUK_ASSERT(h_input != NULL);
|
||
|
|
||
|
len = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_input);
|
||
|
if (len >= DUK_HBUFFER_MAX_BYTELEN / 3) {
|
||
|
DUK_ERROR_TYPE(thr, DUK_STR_RESULT_TOO_LONG);
|
||
|
DUK_WO_NORETURN(return 0;);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Allowance is 3*len because all bytes can potentially be replaced with
|
||
|
* U+FFFD -- which rather inconveniently encodes to 3 bytes in UTF-8.
|
||
|
* Rely on dynamic buffer data pointer stability: no other code has
|
||
|
* access to the data pointer.
|
||
|
*
|
||
|
* XXX: The buffer allocation strategy used here is rather inefficient.
|
||
|
* Maybe switch to a chunk-based strategy, or preprocess the string to
|
||
|
* figure out the space needed ahead of time?
|
||
|
*/
|
||
|
DUK_ASSERT(3 * len >= len);
|
||
|
output = (duk_uint8_t *) duk_push_dynamic_buffer(thr, 3 * len);
|
||
|
|
||
|
if (len > 0) {
|
||
|
DUK_ASSERT(duk_is_string(thr, 0)); /* True if len > 0. */
|
||
|
|
||
|
/* XXX: duk_decode_string() is used to process the input
|
||
|
* string. For standard ECMAScript strings, represented
|
||
|
* internally as CESU-8, this is fine. However, behavior
|
||
|
* beyond CESU-8 is not very strict: codepoints using an
|
||
|
* extended form of UTF-8 are also accepted, and invalid
|
||
|
* codepoint sequences (which are allowed in Duktape strings)
|
||
|
* are not handled as well as they could (e.g. invalid
|
||
|
* continuation bytes may mask following codepoints).
|
||
|
* This is how ECMAScript code would also see such strings.
|
||
|
* Maybe replace duk_decode_string() with an explicit strict
|
||
|
* CESU-8 decoder here?
|
||
|
*/
|
||
|
enc_ctx.lead = 0x0000L;
|
||
|
enc_ctx.out = output;
|
||
|
duk_decode_string(thr, 0, duk__utf8_encode_char, (void *) &enc_ctx);
|
||
|
if (enc_ctx.lead != 0x0000L) {
|
||
|
/* unpaired high surrogate at end of string */
|
||
|
enc_ctx.out = duk__utf8_emit_repl(enc_ctx.out);
|
||
|
DUK_ASSERT(enc_ctx.out <= output + (3 * len));
|
||
|
}
|
||
|
|
||
|
/* The output buffer is usually very much oversized, so shrink it to
|
||
|
* actually needed size. Pointer stability assumed up to this point.
|
||
|
*/
|
||
|
DUK_ASSERT_TOP(thr, 2);
|
||
|
DUK_ASSERT(output == (duk_uint8_t *) duk_get_buffer_data(thr, -1, NULL));
|
||
|
|
||
|
final_len = (duk_size_t) (enc_ctx.out - output);
|
||
|
duk_resize_buffer(thr, -1, final_len);
|
||
|
/* 'output' and 'enc_ctx.out' are potentially invalidated by the resize. */
|
||
|
} else {
|
||
|
final_len = 0;
|
||
|
}
|
||
|
|
||
|
/* Standard WHATWG output is a Uint8Array. Here the Uint8Array will
|
||
|
* be backed by a dynamic buffer which differs from e.g. Uint8Arrays
|
||
|
* created as 'new Uint8Array(N)'. ECMAScript code won't see the
|
||
|
* difference but C code will. When bufferobjects are not supported,
|
||
|
* returns a plain dynamic buffer.
|
||
|
*/
|
||
|
#if defined(DUK_USE_BUFFEROBJECT_SUPPORT)
|
||
|
duk_push_buffer_object(thr, -1, 0, final_len, DUK_BUFOBJ_UINT8ARRAY);
|
||
|
#endif
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
DUK_INTERNAL duk_ret_t duk_bi_textdecoder_constructor(duk_hthread *thr) {
|
||
|
duk__decode_context *dec_ctx;
|
||
|
duk_bool_t fatal = 0;
|
||
|
duk_bool_t ignore_bom = 0;
|
||
|
|
||
|
DUK_ASSERT_TOP(thr, 2);
|
||
|
duk_require_constructor_call(thr);
|
||
|
if (!duk_is_undefined(thr, 0)) {
|
||
|
/* XXX: For now ignore 'label' (encoding identifier). */
|
||
|
duk_to_string(thr, 0);
|
||
|
}
|
||
|
if (!duk_is_null_or_undefined(thr, 1)) {
|
||
|
if (duk_get_prop_literal(thr, 1, "fatal")) {
|
||
|
fatal = duk_to_boolean(thr, -1);
|
||
|
}
|
||
|
if (duk_get_prop_literal(thr, 1, "ignoreBOM")) {
|
||
|
ignore_bom = duk_to_boolean(thr, -1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
duk_push_this(thr);
|
||
|
|
||
|
/* The decode context is not assumed to be zeroed; all fields are
|
||
|
* initialized explicitly.
|
||
|
*/
|
||
|
dec_ctx = (duk__decode_context *) duk_push_fixed_buffer(thr, sizeof(duk__decode_context));
|
||
|
dec_ctx->fatal = (duk_uint8_t) fatal;
|
||
|
dec_ctx->ignore_bom = (duk_uint8_t) ignore_bom;
|
||
|
duk__utf8_decode_init(dec_ctx); /* Initializes remaining fields. */
|
||
|
|
||
|
duk_put_prop_literal(thr, -2, DUK_INTERNAL_SYMBOL("Context"));
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* Get TextDecoder context from 'this'; leaves garbage on stack. */
|
||
|
DUK_LOCAL duk__decode_context *duk__get_textdecoder_context(duk_hthread *thr) {
|
||
|
duk__decode_context *dec_ctx;
|
||
|
duk_push_this(thr);
|
||
|
duk_get_prop_literal(thr, -1, DUK_INTERNAL_SYMBOL("Context"));
|
||
|
dec_ctx = (duk__decode_context *) duk_require_buffer(thr, -1, NULL);
|
||
|
DUK_ASSERT(dec_ctx != NULL);
|
||
|
return dec_ctx;
|
||
|
}
|
||
|
|
||
|
DUK_INTERNAL duk_ret_t duk_bi_textdecoder_prototype_shared_getter(duk_hthread *thr) {
|
||
|
duk__decode_context *dec_ctx;
|
||
|
duk_int_t magic;
|
||
|
|
||
|
dec_ctx = duk__get_textdecoder_context(thr);
|
||
|
magic = duk_get_current_magic(thr);
|
||
|
switch (magic) {
|
||
|
case 0:
|
||
|
/* Encoding is now fixed, so _Context lookup is only needed to
|
||
|
* validate the 'this' binding (TypeError if not TextDecoder-like).
|
||
|
*/
|
||
|
duk_push_literal(thr, "utf-8");
|
||
|
break;
|
||
|
case 1:
|
||
|
duk_push_boolean(thr, dec_ctx->fatal);
|
||
|
break;
|
||
|
default:
|
||
|
duk_push_boolean(thr, dec_ctx->ignore_bom);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
DUK_INTERNAL duk_ret_t duk_bi_textdecoder_prototype_decode(duk_hthread *thr) {
|
||
|
duk__decode_context *dec_ctx;
|
||
|
|
||
|
dec_ctx = duk__get_textdecoder_context(thr);
|
||
|
return duk__decode_helper(thr, dec_ctx);
|
||
|
}
|
||
|
#endif /* DUK_USE_ENCODING_BUILTINS */
|
||
|
|
||
|
/*
|
||
|
* Internal helper for Node.js Buffer
|
||
|
*/
|
||
|
|
||
|
/* Internal helper used for Node.js Buffer .toString(). Value stack convention
|
||
|
* is currently odd: it mimics TextDecoder .decode() so that argument must be at
|
||
|
* index 0, and decode options (not present for Buffer) at index 1. Return value
|
||
|
* is a Duktape/C function return value.
|
||
|
*/
|
||
|
DUK_INTERNAL duk_ret_t duk_textdecoder_decode_utf8_nodejs(duk_hthread *thr) {
|
||
|
duk__decode_context dec_ctx;
|
||
|
|
||
|
dec_ctx.fatal = 0; /* use replacement chars */
|
||
|
dec_ctx.ignore_bom = 1; /* ignore BOMs (matches Node.js Buffer .toString()) */
|
||
|
duk__utf8_decode_init(&dec_ctx);
|
||
|
|
||
|
return duk__decode_helper(thr, &dec_ctx);
|
||
|
}
|