From 469a45e01245a57e6eaecc62269ced218a864121 Mon Sep 17 00:00:00 2001 From: Sharon Rosner Date: Mon, 13 Oct 2025 11:56:31 +0200 Subject: [PATCH] escape_html: Avoid buffer allocation for strings with no escapable character - Perform buffer allocation on first instance of escapable character. - Instead of copying characters one at a time, copy unescaped segments using `memcpy`. --- ext/cgi/escape/escape.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 4773186..7519257 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -47,31 +47,44 @@ optimized_escape_html(VALUE str) { VALUE escaped; VALUE vbuf; - char *buf = ALLOCV_N(char, vbuf, escaped_length(str)); + char *buf = NULL; const char *cstr = RSTRING_PTR(str); const char *end = cstr + RSTRING_LEN(str); - char *dest = buf; + const char *segment_start = cstr; + char *dest = NULL; while (cstr < end) { const unsigned char c = *cstr++; uint8_t len = html_escape_table[c].len; if (len) { + size_t segment_len = cstr - segment_start - 1; + if (!buf) { + buf = ALLOCV_N(char, vbuf, escaped_length(str)); + dest = buf; + } + if (segment_len) { + memcpy(dest, segment_start, segment_len); + dest += segment_len; + } + segment_start = cstr; memcpy(dest, html_escape_table[c].str, len); dest += len; } - else { - *dest++ = c; - } } - if (RSTRING_LEN(str) < (dest - buf)) { + if (buf) { + size_t segment_len = cstr - segment_start; + if (segment_len) { + memcpy(dest, segment_start, segment_len); + dest += segment_len; + } escaped = rb_str_new(buf, dest - buf); preserve_original_state(str, escaped); + ALLOCV_END(vbuf); } else { escaped = rb_str_dup(str); } - ALLOCV_END(vbuf); return escaped; }