From 8149bc3fa6e93cb083b165a21baa5ec07dd473dc Mon Sep 17 00:00:00 2001 From: nicm Date: Sun, 4 Jun 2017 09:02:36 +0000 Subject: Be more strict about escape sequences that rename windows or set titles: ignore any that not valid UTF-8 outright, and for good measure pass the result through our UTF-8-aware vis(3). --- utf8.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'utf8.c') diff --git a/utf8.c b/utf8.c index 3a315749..099541fe 100644 --- a/utf8.c +++ b/utf8.c @@ -207,6 +207,31 @@ utf8_stravis(char **dst, const char *src, int flag) return (len); } +/* Does this string contain anything that isn't valid UTF-8? */ +int +utf8_isvalid(const char *s) +{ + struct utf8_data ud; + const char *end; + enum utf8_state more; + size_t i; + + end = s + strlen(s); + while (s < end) { + if ((more = utf8_open(&ud, *s)) == UTF8_MORE) { + while (++s < end && more == UTF8_MORE) + more = utf8_append(&ud, *s); + if (more == UTF8_DONE) + continue; + return (0); + } + if (*s < 0x20 || *s > 0x7e) + return (0); + s++; + } + return (1); +} + /* * Sanitize a string, changing any UTF-8 characters to '_'. Caller should free * the returned string. Anything not valid printable ASCII or UTF-8 is -- cgit