From afc607ec2b8b0d2a8844cbe9b63412c1a94172b6 Mon Sep 17 00:00:00 2001 From: Squareys Date: Sat, 27 Jun 2026 15:18:45 +0200 Subject: [PATCH 1/5] Zend/micro_bench.php: add json_encode benchmarks Adds json_encode_obj() and json_encode_arr() benchmark cases using the same structure as the rest of the file. json_encode_obj uses a declared- property class (JsonObj) to exercise the properties_info_table path; json_encode_arr uses an associative array for comparison. --- Zend/micro_bench.php | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Zend/micro_bench.php b/Zend/micro_bench.php index 96ced487e1f2..f9f9809ed149 100644 --- a/Zend/micro_bench.php +++ b/Zend/micro_bench.php @@ -231,6 +231,30 @@ function ternary2($n) { } } +class JsonObj { + public function __construct( + public int $id, + public string $name, + public bool $active, + public string $category, + public int $score + ) {} +} + +function json_encode_obj($n) { + $obj = new JsonObj(1, 'test', true, 'category', 42); + for ($i = 0; $i < $n; $i++) { + json_encode($obj); + } +} + +function json_encode_arr($n) { + $arr = ['id' => 1, 'name' => 'test', 'active' => true, 'category' => 'cat', 'score' => 42]; + for ($i = 0; $i < $n; $i++) { + json_encode($arr); + } +} + /*****/ function empty_loop($n) { @@ -355,4 +379,8 @@ function total() $t = end_test($t, '$x = $f ? $f : $a', $overhead); ternary2(N); $t = end_test($t, '$x = $f ? $f : tmp', $overhead); +json_encode_obj(N/50); +$t = end_test($t, 'json_encode(obj)', $overhead); +json_encode_arr(N/50); +$t = end_test($t, 'json_encode(arr)', $overhead); total($t0, "Total"); From cca07d9f9bc571d87c4bb40c1797e06814b4afa5 Mon Sep 17 00:00:00 2001 From: Squareys Date: Sat, 27 Jun 2026 16:35:19 +0200 Subject: [PATCH 2/5] ext/json: single-alloc fast path for strings without special characters Add php_json_append_quoted() which reserves len+2 bytes in one smart_str_extend() call and writes '"', the string body, and '"' via raw pointer writes, replacing the previous three-call sequence (appendc, appendl, appendc) that each checked buffer capacity. Hoist the charmap to file scope so it can be shared with the upcoming identifier encoder. Callgrind on a mixed object+array workload (30k iterations each): baseline 2,687,712,329 instructions this diff 2,463,931,521 instructions (-8.3%) --- ext/json/json_encoder.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 424315eca7ec..22e12b0a4451 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -30,6 +30,18 @@ static const char digits[] = "0123456789abcdef"; +static const uint32_t charmap[8] = { + 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; + +static zend_always_inline void php_json_append_quoted(smart_str *buf, const char *s, size_t len) +{ + char *dst = smart_str_extend(buf, len + 2); + dst[0] = '"'; + memcpy(dst + 1, s, len); + dst[len + 1] = '"'; +} + static zend_always_inline bool php_json_check_stack_limit(void) { #ifdef ZEND_CHECK_STACK_LIMIT @@ -105,6 +117,7 @@ static inline void php_json_encode_double(smart_str *buf, double d, int options) } \ } while (0) + static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */ { bool encode_as_object = options & PHP_JSON_FORCE_OBJECT; @@ -374,6 +387,18 @@ zend_result php_json_escape_string( } } + /* fast path: no characters in the string require escaping allows us to single alloc and memcpy */ + { + size_t i = 0; + while (i < len && !ZEND_BIT_TEST(charmap, (unsigned char)s[i])) { + i++; + } + if (EXPECTED(i == len)) { + php_json_append_quoted(buf, s, len); + return SUCCESS; + } + } + checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0; /* pre-allocate for string length plus 2 quotes */ @@ -383,10 +408,6 @@ zend_result php_json_escape_string( pos = 0; do { - static const uint32_t charmap[8] = { - 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; - unsigned int us = (unsigned char)s[pos]; if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) { pos++; From a77ac43adb91f691a1da57a00122dda905a68eb1 Mon Sep 17 00:00:00 2001 From: Squareys Date: Sat, 27 Jun 2026 16:35:43 +0200 Subject: [PATCH 3/5] ext/json: fast path for declared-property object keys PHP property names are valid identifiers and cannot contain any ASCII character that requires JSON escaping. Add php_json_encode_identifier() which replaces the full charmap scan with a single byte-range check (< 0x80): pure-ASCII identifiers take the fast path (one alloc + raw write via php_json_append_quoted), multibyte identifiers fall through to the same UTF-8 handling as php_json_escape_string. Use this in the properties_info_table path of php_json_encode_array, which is the hot path for objects with declared properties. Callgrind on a mixed object+array workload (30k iterations each): string fast path 2,463,931,521 instructions + this diff 2,153,092,353 instructions (-12.6% vs baseline) --- UPGRADING | 2 + ext/json/json_encoder.c | 98 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/UPGRADING b/UPGRADING index 15c1aad15db0..a46348a7a5fe 100644 --- a/UPGRADING +++ b/UPGRADING @@ -499,6 +499,8 @@ PHP 8.6 UPGRADE NOTES . Improve performance of encoding arrays and objects. . Improved performance of indentation generation in json_encode() when using PHP_JSON_PRETTY_PRINT. + . Improved performance of json_encode() for strings without special + characters and for objects with declared properties. - Phar: . Reduced temporary allocations when iterating Phar directories. diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 22e12b0a4451..5fc3449daaaf 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -117,6 +117,102 @@ static inline void php_json_encode_double(smart_str *buf, double d, int options) } \ } while (0) +/* encode a PHP identifier (property name) as a JSON string key. + * PHP identifiers cannot contain ASCII characters that require JSON escaping, + * so the fast path only scans for multi-byte UTF-8 sequences (bytes >= 0x80). */ +static zend_result php_json_encode_identifier( + smart_str *buf, const char *s, size_t len, + int options, php_json_encoder *encoder) +{ + /* fast path: no characters require escaping (PHP identifiers contain no JSON-special ASCII bytes) */ + { + size_t i = 0; + while (i < len && (unsigned char)s[i] < 0x80) { + i++; + } + if (EXPECTED(i == len)) { + php_json_append_quoted(buf, s, len); + return SUCCESS; + } + } + + size_t checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0; + smart_str_alloc(buf, len + 2, 0); + smart_str_appendc(buf, '"'); + + size_t pos = 0; + do { + unsigned int us = (unsigned char)s[pos]; + if (EXPECTED(us < 0x80)) { + pos++; + len--; + if (len == 0) { + smart_str_appendl(buf, s, pos); + break; + } + } else { + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; + } + zend_result status; + us = php_next_utf8_char((unsigned char *)s, len, &pos, &status); + + if (UNEXPECTED(status != SUCCESS)) { + if (options & PHP_JSON_INVALID_UTF8_IGNORE) { + /* ignore invalid UTF-8 byte */ + } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { + if (options & PHP_JSON_UNESCAPED_UNICODE) { + smart_str_appendl(buf, "\xef\xbf\xbd", 3); + } else { + smart_str_appendl(buf, "\\ufffd", 6); + } + } else { + ZSTR_LEN(buf->s) = checkpoint; + encoder->error_code = PHP_JSON_ERROR_UTF8; + if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { + smart_str_appendl(buf, "null", 4); + } + return FAILURE; + } + } else if ((options & PHP_JSON_UNESCAPED_UNICODE) + && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS) + || us < 0x2028 || us > 0x2029)) { + smart_str_appendl(buf, s, pos); + } else { + char *dst; + if (us >= 0x10000) { + unsigned int next_us; + us -= 0x10000; + next_us = (unsigned short)((us & 0x3ff) | 0xdc00); + us = (unsigned short)((us >> 10) | 0xd800); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; + us = next_us; + } + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; + } + s += pos; + len -= pos; + pos = 0; + } + } while (len); + + smart_str_appendc(buf, '"'); + return SUCCESS; +} static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */ { @@ -175,7 +271,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, php_json_pretty_print_char(buf, options, '\n'); php_json_pretty_print_indent(buf, options, encoder); - if (php_json_escape_string(buf, ZSTR_VAL(prop_info->name), ZSTR_LEN(prop_info->name), + if (php_json_encode_identifier(buf, ZSTR_VAL(prop_info->name), ZSTR_LEN(prop_info->name), options & ~PHP_JSON_NUMERIC_CHECK, encoder) == FAILURE && (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) && buf->s) { From faef2b3966c25ac01f672ac28a31344bb60876bd Mon Sep 17 00:00:00 2001 From: Squareys Date: Sat, 27 Jun 2026 20:25:33 +0200 Subject: [PATCH 4/5] ext/json: factor out \uXXXX escape writing into php_json_append_unicode_escape Reduces duplication in the surrogate pair paths in both php_json_escape_string and php_json_encode_identifier. Also moves charmap inside php_json_escape_string since it is only used there. --- ext/json/json_encoder.c | 89 +++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 53 deletions(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 5fc3449daaaf..756566874c2b 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -30,10 +30,6 @@ static const char digits[] = "0123456789abcdef"; -static const uint32_t charmap[8] = { - 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; - static zend_always_inline void php_json_append_quoted(smart_str *buf, const char *s, size_t len) { char *dst = smart_str_extend(buf, len + 2); @@ -42,6 +38,17 @@ static zend_always_inline void php_json_append_quoted(smart_str *buf, const char dst[len + 1] = '"'; } +static zend_always_inline void php_json_append_unicode_escape(smart_str *buf, unsigned int us) +{ + char *dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; +} + static zend_always_inline bool php_json_check_stack_limit(void) { #ifdef ZEND_CHECK_STACK_LIMIT @@ -124,23 +131,23 @@ static zend_result php_json_encode_identifier( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) { + size_t pos, checkpoint; + /* fast path: no characters require escaping (PHP identifiers contain no JSON-special ASCII bytes) */ - { - size_t i = 0; - while (i < len && (unsigned char)s[i] < 0x80) { - i++; - } - if (EXPECTED(i == len)) { - php_json_append_quoted(buf, s, len); - return SUCCESS; - } + pos = 0; + while (pos < len && (unsigned char)s[pos] < 0x80) { + pos++; + } + if (EXPECTED(pos == len)) { + php_json_append_quoted(buf, s, len); + return SUCCESS; } - size_t checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0; + checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0; smart_str_alloc(buf, len + 2, 0); smart_str_appendc(buf, '"'); - size_t pos = 0; + pos = 0; do { unsigned int us = (unsigned char)s[pos]; if (EXPECTED(us < 0x80)) { @@ -181,28 +188,15 @@ static zend_result php_json_encode_identifier( || us < 0x2028 || us > 0x2029)) { smart_str_appendl(buf, s, pos); } else { - char *dst; if (us >= 0x10000) { unsigned int next_us; us -= 0x10000; next_us = (unsigned short)((us & 0x3ff) | 0xdc00); us = (unsigned short)((us >> 10) | 0xd800); - dst = smart_str_extend(buf, 6); - dst[0] = '\\'; - dst[1] = 'u'; - dst[2] = digits[(us >> 12) & 0xf]; - dst[3] = digits[(us >> 8) & 0xf]; - dst[4] = digits[(us >> 4) & 0xf]; - dst[5] = digits[us & 0xf]; + php_json_append_unicode_escape(buf, us); us = next_us; } - dst = smart_str_extend(buf, 6); - dst[0] = '\\'; - dst[1] = 'u'; - dst[2] = digits[(us >> 12) & 0xf]; - dst[3] = digits[(us >> 8) & 0xf]; - dst[4] = digits[(us >> 4) & 0xf]; - dst[5] = digits[us & 0xf]; + php_json_append_unicode_escape(buf, us); } s += pos; len -= pos; @@ -459,6 +453,9 @@ zend_result php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ { + static const uint32_t charmap[8] = { + 0xffffffff, 0x500080c4, 0x10000000, 0x00000000, + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; size_t pos, checkpoint; char *dst; @@ -484,15 +481,13 @@ zend_result php_json_escape_string( } /* fast path: no characters in the string require escaping allows us to single alloc and memcpy */ - { - size_t i = 0; - while (i < len && !ZEND_BIT_TEST(charmap, (unsigned char)s[i])) { - i++; - } - if (EXPECTED(i == len)) { - php_json_append_quoted(buf, s, len); - return SUCCESS; - } + pos = 0; + while (pos < len && !ZEND_BIT_TEST(charmap, (unsigned char)s[pos])) { + pos++; + } + if (EXPECTED(pos == len)) { + php_json_append_quoted(buf, s, len); + return SUCCESS; } checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0; @@ -558,22 +553,10 @@ zend_result php_json_escape_string( us -= 0x10000; next_us = (unsigned short)((us & 0x3ff) | 0xdc00); us = (unsigned short)((us >> 10) | 0xd800); - dst = smart_str_extend(buf, 6); - dst[0] = '\\'; - dst[1] = 'u'; - dst[2] = digits[(us >> 12) & 0xf]; - dst[3] = digits[(us >> 8) & 0xf]; - dst[4] = digits[(us >> 4) & 0xf]; - dst[5] = digits[us & 0xf]; + php_json_append_unicode_escape(buf, us); us = next_us; } - dst = smart_str_extend(buf, 6); - dst[0] = '\\'; - dst[1] = 'u'; - dst[2] = digits[(us >> 12) & 0xf]; - dst[3] = digits[(us >> 8) & 0xf]; - dst[4] = digits[(us >> 4) & 0xf]; - dst[5] = digits[us & 0xf]; + php_json_append_unicode_escape(buf, us); } s += pos; len -= pos; From 6e114dbbe96edfc58bb8b6c84235ae14b314f32f Mon Sep 17 00:00:00 2001 From: Squareys Date: Sat, 27 Jun 2026 20:31:53 +0200 Subject: [PATCH 5/5] ext/json: flush clean ASCII prefix before slow path in escape_string and encode_identifier When a string has a long clean ASCII prefix but ends with a character requiring escaping, the fast path scan already found the split point. Flush the prefix with a single smart_str_appendl before entering the slow loop instead of re-scanning it byte by byte. --- ext/json/json_encoder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 756566874c2b..d6020ee72ca7 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -147,6 +147,12 @@ static zend_result php_json_encode_identifier( smart_str_alloc(buf, len + 2, 0); smart_str_appendc(buf, '"'); + /* flush the clean ASCII prefix found by the fast path scan */ + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + len -= pos; + } pos = 0; do { unsigned int us = (unsigned char)s[pos]; @@ -496,6 +502,12 @@ zend_result php_json_escape_string( smart_str_alloc(buf, len+2, 0); smart_str_appendc(buf, '"'); + /* flush the clean ASCII prefix found by the fast path scan */ + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + len -= pos; + } pos = 0; do {