diff options
author | Josh Rahm <joshuarahm@gmail.com> | 2023-11-30 20:35:25 +0000 |
---|---|---|
committer | Josh Rahm <joshuarahm@gmail.com> | 2023-11-30 20:35:25 +0000 |
commit | 1b7b916b7631ddf73c38e3a0070d64e4636cb2f3 (patch) | |
tree | cd08258054db80bb9a11b1061bb091c70b76926a /src/nvim/strings.c | |
parent | eaa89c11d0f8aefbb512de769c6c82f61a8baca3 (diff) | |
parent | 4a8bf24ac690004aedf5540fa440e788459e5e34 (diff) | |
download | rneovim-1b7b916b7631ddf73c38e3a0070d64e4636cb2f3.tar.gz rneovim-1b7b916b7631ddf73c38e3a0070d64e4636cb2f3.tar.bz2 rneovim-1b7b916b7631ddf73c38e3a0070d64e4636cb2f3.zip |
Merge remote-tracking branch 'upstream/master' into aucmd_textputpostaucmd_textputpost
Diffstat (limited to 'src/nvim/strings.c')
-rw-r--r-- | src/nvim/strings.c | 1665 |
1 files changed, 1573 insertions, 92 deletions
diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 34b3c38103..a439d11818 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -1,6 +1,3 @@ -// This is an open source non-commercial project. Dear PVS-Studio, please check -// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com - #include <assert.h> #include <inttypes.h> #include <math.h> @@ -12,23 +9,56 @@ #include <string.h> #include "auto/config.h" -#include "nvim/ascii.h" -#include "nvim/assert.h" +#include "nvim/ascii_defs.h" +#include "nvim/assert_defs.h" #include "nvim/charset.h" #include "nvim/eval/encode.h" #include "nvim/eval/typval.h" #include "nvim/eval/typval_defs.h" #include "nvim/ex_docmd.h" +#include "nvim/garray.h" #include "nvim/gettext.h" -#include "nvim/macros.h" +#include "nvim/globals.h" +#include "nvim/macros_defs.h" #include "nvim/math.h" #include "nvim/mbyte.h" #include "nvim/memory.h" #include "nvim/message.h" #include "nvim/option.h" +#include "nvim/plines.h" #include "nvim/strings.h" -#include "nvim/types.h" -#include "nvim/vim.h" +#include "nvim/types_defs.h" +#include "nvim/vim_defs.h" + +static const char e_cannot_mix_positional_and_non_positional_str[] + = N_("E1500: Cannot mix positional and non-positional arguments: %s"); +static const char e_fmt_arg_nr_unused_str[] + = N_("E1501: format argument %d unused in $-style format: %s"); +static const char e_positional_num_field_spec_reused_str_str[] + = N_("E1502: Positional argument %d used as field width reused as different type: %s/%s"); +static const char e_positional_nr_out_of_bounds_str[] + = N_("E1503: Positional argument %d out of bounds: %s"); +static const char e_positional_arg_num_type_inconsistent_str_str[] + = N_("E1504: Positional argument %d type used inconsistently: %s/%s"); +static const char e_invalid_format_specifier_str[] + = N_("E1505: Invalid format specifier: %s"); +static const char e_aptypes_is_null_nr_str[] + = "E1507: Internal error: ap_types or ap_types[idx] is NULL: %d: %s"; + +static const char typename_unknown[] = N_("unknown"); +static const char typename_int[] = N_("int"); +static const char typename_longint[] = N_("long int"); +static const char typename_longlongint[] = N_("long long int"); +static const char typename_signedsizet[] = N_("signed size_t"); +static const char typename_unsignedint[] = N_("unsigned int"); +static const char typename_unsignedlongint[] = N_("unsigned long int"); +static const char typename_unsignedlonglongint[] = N_("unsigned long long int"); +static const char typename_sizet[] = N_("size_t"); +static const char typename_pointer[] = N_("pointer"); +static const char typename_percent[] = N_("percent"); +static const char typename_char[] = N_("char"); +static const char typename_string[] = N_("string"); +static const char typename_float[] = N_("float"); /// Copy up to `len` bytes of `string` into newly allocated memory and /// terminate with a NUL. The allocated memory always has size `len + 1`, even @@ -350,18 +380,6 @@ void del_trailing_spaces(char *ptr) } } -#if !defined(HAVE_STRNLEN) -size_t xstrnlen(const char *s, size_t n) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE -{ - const char *end = memchr(s, '\0', n); - if (end == NULL) { - return n; - } - return (size_t)(end - s); -} -#endif - #if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) // Compare two strings, ignoring case, using current locale. // Doesn't work for multi-byte characters. @@ -371,7 +389,7 @@ int vim_stricmp(const char *s1, const char *s2) { int i; - for (;;) { + while (true) { i = (int)TOLOWER_LOC((uint8_t)(*s1)) - (int)TOLOWER_LOC((uint8_t)(*s2)); if (i != 0) { return i; // this character different @@ -411,6 +429,13 @@ int vim_strnicmp(const char *s1, const char *s2, size_t len) } #endif +/// Case-insensitive `strequal`. +bool striequal(const char *a, const char *b) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (a == NULL && b == NULL) || (a && b && STRICMP(a, b) == 0); +} + /// strchr() version which handles multibyte strings /// /// @param[in] string String to search in. @@ -455,10 +480,8 @@ void sort_strings(char **files, int count) bool has_non_ascii(const char *s) FUNC_ATTR_PURE { - const char *p; - if (s != NULL) { - for (p = s; *p != NUL; p++) { + for (const char *p = s; *p != NUL; p++) { if ((uint8_t)(*p) >= 128) { return true; } @@ -498,13 +521,10 @@ static const char *const e_printf = /// Get number argument from idxp entry in tvs /// -/// Will give an error message for VimL entry with invalid type or for -/// insufficient entries. +/// Will give an error message for Vimscript entry with invalid type or for insufficient entries. /// -/// @param[in] tvs List of VimL values. List is terminated by VAR_UNKNOWN -/// value. -/// @param[in,out] idxp Index in a list. Will be incremented. Indexing starts -/// at 1. +/// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN value. +/// @param[in,out] idxp Index in a list. Will be incremented. Indexing starts at 1. /// /// @return Number value or 0 in case of error. static varnumber_T tv_nr(typval_T *tvs, int *idxp) @@ -528,10 +548,10 @@ static varnumber_T tv_nr(typval_T *tvs, int *idxp) /// Get string argument from idxp entry in tvs /// -/// Will give an error message for VimL entry with invalid type or for +/// Will give an error message for Vimscript entry with invalid type or for /// insufficient entries. /// -/// @param[in] tvs List of VimL values. List is terminated by VAR_UNKNOWN +/// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN /// value. /// @param[in,out] idxp Index in a list. Will be incremented. /// @param[out] tofree If the idxp entry in tvs is not a String or a Number, @@ -562,7 +582,7 @@ static const char *tv_str(typval_T *tvs, int *idxp, char **const tofree) /// Get pointer argument from the next entry in tvs /// -/// Will give an error message for VimL entry with invalid type or for +/// Will give an error message for Vimscript entry with invalid type or for /// insufficient entries. /// /// @param[in] tvs List of typval_T values. @@ -573,7 +593,7 @@ static const void *tv_ptr(const typval_T *const tvs, int *const idxp) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { #define OFF(attr) offsetof(union typval_vval_union, attr) - STATIC_ASSERT(OFF(v_string) == OFF(v_list) // -V568 + STATIC_ASSERT(OFF(v_string) == OFF(v_list) && OFF(v_string) == OFF(v_dict) && OFF(v_string) == OFF(v_partial) && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_list) @@ -593,11 +613,10 @@ static const void *tv_ptr(const typval_T *const tvs, int *const idxp) /// Get float argument from idxp entry in tvs /// -/// Will give an error message for VimL entry with invalid type or for +/// Will give an error message for Vimscript entry with invalid type or for /// insufficient entries. /// -/// @param[in] tvs List of VimL values. List is terminated by VAR_UNKNOWN -/// value. +/// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN value. /// @param[in,out] idxp Index in a list. Will be incremented. /// /// @return Floating-point value or zero in case of error. @@ -719,23 +738,608 @@ int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap) return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL); } +enum { + TYPE_UNKNOWN = -1, + TYPE_INT, + TYPE_LONGINT, + TYPE_LONGLONGINT, + TYPE_SIGNEDSIZET, + TYPE_UNSIGNEDINT, + TYPE_UNSIGNEDLONGINT, + TYPE_UNSIGNEDLONGLONGINT, + TYPE_SIZET, + TYPE_POINTER, + TYPE_PERCENT, + TYPE_CHAR, + TYPE_STRING, + TYPE_FLOAT, +}; + +/// Types that can be used in a format string +static int format_typeof(const char *type) + FUNC_ATTR_NONNULL_ALL +{ + // allowed values: \0, h, l, L + char length_modifier = '\0'; + + // current conversion specifier character + char fmt_spec = '\0'; + + // parse 'h', 'l', 'll' and 'z' length modifiers + if (*type == 'h' || *type == 'l' || *type == 'z') { + length_modifier = *type; + type++; + if (length_modifier == 'l' && *type == 'l') { + // double l = long long + length_modifier = 'L'; + type++; + } + } + fmt_spec = *type; + + // common synonyms: + switch (fmt_spec) { + case 'i': + fmt_spec = 'd'; break; + case '*': + fmt_spec = 'd'; length_modifier = 'h'; break; + case 'D': + fmt_spec = 'd'; length_modifier = 'l'; break; + case 'U': + fmt_spec = 'u'; length_modifier = 'l'; break; + case 'O': + fmt_spec = 'o'; length_modifier = 'l'; break; + default: + break; + } + + // get parameter value, do initial processing + switch (fmt_spec) { + // '%' and 'c' behave similar to 's' regarding flags and field + // widths + case '%': + return TYPE_PERCENT; + + case 'c': + return TYPE_CHAR; + + case 's': + case 'S': + return TYPE_STRING; + + case 'd': + case 'u': + case 'b': + case 'B': + case 'o': + case 'x': + case 'X': + case 'p': + // NOTE: the u, b, o, x, X and p conversion specifiers + // imply the value is unsigned; d implies a signed + // value + + // 0 if numeric argument is zero (or if pointer is + // NULL for 'p'), +1 if greater than zero (or nonzero + // for unsigned arguments), -1 if negative (unsigned + // argument is never negative) + + if (fmt_spec == 'p') { + return TYPE_POINTER; + } else if (fmt_spec == 'b' || fmt_spec == 'B') { + return TYPE_UNSIGNEDLONGLONGINT; + } else if (fmt_spec == 'd') { + // signed + switch (length_modifier) { + case '\0': + case 'h': + // char and short arguments are passed as int. + return TYPE_INT; + case 'l': + return TYPE_LONGINT; + case 'L': + return TYPE_LONGLONGINT; + case 'z': + return TYPE_SIGNEDSIZET; + } + } else { + // unsigned + switch (length_modifier) { + case '\0': + case 'h': + return TYPE_UNSIGNEDINT; + case 'l': + return TYPE_UNSIGNEDLONGINT; + case 'L': + return TYPE_UNSIGNEDLONGLONGINT; + case 'z': + return TYPE_SIZET; + } + } + break; + + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + return TYPE_FLOAT; + } + + return TYPE_UNKNOWN; +} + +static char *format_typename(const char *type) + FUNC_ATTR_NONNULL_ALL +{ + switch (format_typeof(type)) { + case TYPE_INT: + return _(typename_int); + case TYPE_LONGINT: + return _(typename_longint); + case TYPE_LONGLONGINT: + return _(typename_longlongint); + case TYPE_UNSIGNEDINT: + return _(typename_unsignedint); + case TYPE_SIGNEDSIZET: + return _(typename_signedsizet); + case TYPE_UNSIGNEDLONGINT: + return _(typename_unsignedlongint); + case TYPE_UNSIGNEDLONGLONGINT: + return _(typename_unsignedlonglongint); + case TYPE_SIZET: + return _(typename_sizet); + case TYPE_POINTER: + return _(typename_pointer); + case TYPE_PERCENT: + return _(typename_percent); + case TYPE_CHAR: + return _(typename_char); + case TYPE_STRING: + return _(typename_string); + case TYPE_FLOAT: + return _(typename_float); + } + + return _(typename_unknown); +} + +static int adjust_types(const char ***ap_types, int arg, int *num_posarg, const char *type) + FUNC_ATTR_NONNULL_ALL +{ + if (*ap_types == NULL || *num_posarg < arg) { + const char **new_types = *ap_types == NULL + ? xcalloc(sizeof(const char *), (size_t)arg) + : xrealloc(*ap_types, (size_t)arg * sizeof(const char *)); + + for (int idx = *num_posarg; idx < arg; idx++) { + new_types[idx] = NULL; + } + + *ap_types = new_types; + *num_posarg = arg; + } + + if ((*ap_types)[arg - 1] != NULL) { + if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*') { + const char *pt = type; + if (pt[0] == '*') { + pt = (*ap_types)[arg - 1]; + } + + if (pt[0] != '*') { + switch (pt[0]) { + case 'd': + case 'i': + break; + default: + semsg(_(e_positional_num_field_spec_reused_str_str), arg, + format_typename((*ap_types)[arg - 1]), format_typename(type)); + return FAIL; + } + } + } else { + if (format_typeof(type) != format_typeof((*ap_types)[arg - 1])) { + semsg(_(e_positional_arg_num_type_inconsistent_str_str), arg, + format_typename(type), format_typename((*ap_types)[arg - 1])); + return FAIL; + } + } + } + + (*ap_types)[arg - 1] = type; + + return OK; +} + +static int parse_fmt_types(const char ***ap_types, int *num_posarg, const char *fmt, typval_T *tvs) + FUNC_ATTR_NONNULL_ARG(1, 2) +{ + const char *p = fmt; + const char *arg = NULL; + + int any_pos = 0; + int any_arg = 0; + +#define CHECK_POS_ARG \ + do { \ + if (any_pos && any_arg) { \ + semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt); \ + goto error; \ + } \ + } while (0); + + if (p == NULL) { + return OK; + } + + while (*p != NUL) { + if (*p != '%') { + char *q = strchr(p + 1, '%'); + size_t n = (q == NULL) ? strlen(p) : (size_t)(q - p); + + p += n; + } else { + // allowed values: \0, h, l, L + char length_modifier = '\0'; + + // variable for positional arg + int pos_arg = -1; + + p++; // skip '%' + + // First check to see if we find a positional + // argument specifier + const char *ptype = p; + + while (ascii_isdigit(*ptype)) { + ptype++; + } + + if (*ptype == '$') { + if (*p == '0') { + // 0 flag at the wrong place + semsg(_(e_invalid_format_specifier_str), fmt); + goto error; + } + + // Positional argument + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + pos_arg = (int)uj; + + any_pos = 1; + CHECK_POS_ARG; + + p++; + } + + // parse flags + while (*p == '0' || *p == '-' || *p == '+' || *p == ' ' + || *p == '#' || *p == '\'') { + switch (*p) { + case '0': + break; + case '-': + break; + case '+': + break; + case ' ': // If both the ' ' and '+' flags appear, the ' ' + // flag should be ignored + break; + case '#': + break; + case '\'': + break; + } + p++; + } + // If the '0' and '-' flags both appear, the '0' flag should be + // ignored. + + // parse field width + if (*(arg = p) == '*') { + p++; + + if (ascii_isdigit((int)(*p))) { + // Positional argument field width + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + + if (*p != '$') { + semsg(_(e_invalid_format_specifier_str), fmt); + goto error; + } else { + p++; + any_pos = 1; + CHECK_POS_ARG; + + if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) { + goto error; + } + } + } else { + any_arg = 1; + CHECK_POS_ARG; + } + } else if (ascii_isdigit((int)(*p))) { + // size_t could be wider than unsigned int; make sure we treat + // argument like common implementations do + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + + if (*p == '$') { + semsg(_(e_invalid_format_specifier_str), fmt); + goto error; + } + } + + // parse precision + if (*p == '.') { + p++; + + if (*(arg = p) == '*') { + p++; + + if (ascii_isdigit((int)(*p))) { + // Parse precision + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + + if (*p == '$') { + any_pos = 1; + CHECK_POS_ARG; + + p++; + + if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) { + goto error; + } + } else { + semsg(_(e_invalid_format_specifier_str), fmt); + goto error; + } + } else { + any_arg = 1; + CHECK_POS_ARG; + } + } else if (ascii_isdigit((int)(*p))) { + // size_t could be wider than unsigned int; make sure we + // treat argument like common implementations do + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + + if (*p == '$') { + semsg(_(e_invalid_format_specifier_str), fmt); + goto error; + } + } + } + + if (pos_arg != -1) { + any_pos = 1; + CHECK_POS_ARG; + + ptype = p; + } + + // parse 'h', 'l', 'll' and 'z' length modifiers + if (*p == 'h' || *p == 'l' || *p == 'z') { + length_modifier = *p; + p++; + if (length_modifier == 'l' && *p == 'l') { + // double l = long long + // length_modifier = 'L'; + p++; + } + } + + switch (*p) { + // Check for known format specifiers. % is special! + case 'i': + case '*': + case 'd': + case 'u': + case 'o': + case 'D': + case 'U': + case 'O': + case 'x': + case 'X': + case 'b': + case 'B': + case 'c': + case 's': + case 'S': + case 'p': + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + if (pos_arg != -1) { + if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL) { + goto error; + } + } else { + any_arg = 1; + CHECK_POS_ARG; + } + break; + + default: + if (pos_arg != -1) { + semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt); + goto error; + } + } + + if (*p != NUL) { + p++; // step over the just processed conversion specifier + } + } + } + + for (int arg_idx = 0; arg_idx < *num_posarg; arg_idx++) { + if ((*ap_types)[arg_idx] == NULL) { + semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt); + goto error; + } + + if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN) { + semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt); + goto error; + } + } + + return OK; + +error: + xfree(*ap_types); + *ap_types = NULL; + *num_posarg = 0; + return FAIL; +} + +static void skip_to_arg(const char **ap_types, va_list ap_start, va_list *ap, int *arg_idx, + int *arg_cur, const char *fmt) + FUNC_ATTR_NONNULL_ARG(3, 4, 5) +{ + int arg_min = 0; + + if (*arg_cur + 1 == *arg_idx) { + (*arg_cur)++; + (*arg_idx)++; + return; + } + + if (*arg_cur >= *arg_idx) { + // Reset ap to ap_start and skip arg_idx - 1 types + va_end(*ap); + va_copy(*ap, ap_start); + } else { + // Skip over any we should skip + arg_min = *arg_cur; + } + + for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; (*arg_cur)++) { + if (ap_types == NULL || ap_types[*arg_cur] == NULL) { + siemsg(e_aptypes_is_null_nr_str, fmt, *arg_cur); + return; + } + + const char *p = ap_types[*arg_cur]; + + int fmt_type = format_typeof(p); + + // get parameter value, do initial processing + switch (fmt_type) { + case TYPE_PERCENT: + case TYPE_UNKNOWN: + break; + + case TYPE_CHAR: + va_arg(*ap, int); + break; + + case TYPE_STRING: + va_arg(*ap, const char *); + break; + + case TYPE_POINTER: + va_arg(*ap, void *); + break; + + case TYPE_INT: + va_arg(*ap, int); + break; + + case TYPE_LONGINT: + va_arg(*ap, long); + break; + + case TYPE_LONGLONGINT: + va_arg(*ap, long long); // NOLINT(runtime/int) + break; + + case TYPE_SIGNEDSIZET: // implementation-defined, usually ptrdiff_t + va_arg(*ap, ptrdiff_t); + break; + + case TYPE_UNSIGNEDINT: + va_arg(*ap, unsigned); + break; + + case TYPE_UNSIGNEDLONGINT: + va_arg(*ap, unsigned long); + break; + + case TYPE_UNSIGNEDLONGLONGINT: + va_arg(*ap, unsigned long long); // NOLINT(runtime/int) + break; + + case TYPE_SIZET: + va_arg(*ap, size_t); + break; + + case TYPE_FLOAT: + va_arg(*ap, double); + break; + } + } + + // Because we know that after we return from this call, + // a va_arg() call is made, we can pre-emptively + // increment the current argument index. + (*arg_cur)++; + (*arg_idx)++; +} + /// Write formatted value to the string /// /// @param[out] str String to write to. /// @param[in] str_m String length. /// @param[in] fmt String format. /// @param[in] ap Values that should be formatted. Ignored if tvs is not NULL. -/// @param[in] tvs Values that should be formatted, for printf() VimL +/// @param[in] tvs Values that should be formatted, for printf() Vimscript /// function. Must be NULL in other cases. /// /// @return Number of bytes excluding NUL byte that would be written to the /// string if str_m was greater or equal to the return value. -int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, typval_T *const tvs) +int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap_start, + typval_T *const tvs) { size_t str_l = 0; bool str_avail = str_l < str_m; const char *p = fmt; + int arg_cur = 0; + int num_posarg = 0; int arg_idx = 1; + va_list ap; + const char **ap_types = NULL; + + if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL) { + return 0; + } + + va_copy(ap, ap_start); if (!p) { p = ""; @@ -791,8 +1395,31 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t // buffer for 's' and 'S' specs char *tofree = NULL; + // variable for positional arg + int pos_arg = -1; + p++; // skip '%' + // First check to see if we find a positional + // argument specifier + const char *ptype = p; + + while (ascii_isdigit(*ptype)) { + ptype++; + } + + if (*ptype == '$') { + // Positional argument + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + pos_arg = (int)uj; + + p++; + } + // parse flags while (true) { switch (*p) { @@ -819,7 +1446,25 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t // parse field width if (*p == '*') { p++; - const int j = tvs ? (int)tv_nr(tvs, &arg_idx) : va_arg(ap, int); + + if (ascii_isdigit((int)(*p))) { + // Positional argument field width + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + arg_idx = (int)uj; + + p++; + } + + const int j = (tvs + ? (int)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, int))); + if (j >= 0) { min_field_width = (size_t)j; } else { @@ -829,10 +1474,10 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t } else if (ascii_isdigit((int)(*p))) { // size_t could be wider than unsigned int; make sure we treat // argument like common implementations do - unsigned int uj = (unsigned)(*p++ - '0'); + unsigned uj = (unsigned)(*p++ - '0'); while (ascii_isdigit((int)(*p))) { - uj = 10 * uj + (unsigned int)(*p++ - '0'); + uj = 10 * uj + (unsigned)(*p++ - '0'); } min_field_width = uj; } @@ -841,24 +1486,43 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t if (*p == '.') { p++; precision_specified = 1; - if (*p == '*') { - const int j = tvs ? (int)tv_nr(tvs, &arg_idx) : va_arg(ap, int); + + if (ascii_isdigit((int)(*p))) { + // size_t could be wider than unsigned int; make sure we + // treat argument like common implementations do + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + precision = uj; + } else if (*p == '*') { p++; + + if (ascii_isdigit((int)(*p))) { + // positional argument + unsigned uj = (unsigned)(*p++ - '0'); + + while (ascii_isdigit((int)(*p))) { + uj = 10 * uj + (unsigned)(*p++ - '0'); + } + arg_idx = (int)uj; + + p++; + } + + const int j = (tvs + ? (int)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, int))); + if (j >= 0) { precision = (size_t)j; } else { precision_specified = 0; precision = 0; } - } else if (ascii_isdigit((int)(*p))) { - // size_t could be wider than unsigned int; make sure we - // treat argument like common implementations do - unsigned int uj = (unsigned)(*p++ - '0'); - - while (ascii_isdigit((int)(*p))) { - uj = 10 * uj + (unsigned int)(*p++ - '0'); - } - precision = uj; } } @@ -866,8 +1530,9 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t if (*p == 'h' || *p == 'l' || *p == 'z') { length_modifier = *p; p++; - if (length_modifier == 'l' && *p == 'l') { // ll, encoded as 2 - length_modifier = '2'; + if (length_modifier == 'l' && *p == 'l') { + // double l = long long + length_modifier = 'L'; p++; } } @@ -897,10 +1562,14 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t case 'x': case 'X': if (tvs && length_modifier == '\0') { - length_modifier = '2'; + length_modifier = 'L'; } } + if (pos_arg != -1) { + arg_idx = pos_arg; + } + // get parameter value, do initial processing switch (fmt_spec) { // '%' and 'c' behave similar to 's' regarding flags and field widths @@ -915,7 +1584,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t break; case 'c': { - const int j = tvs ? (int)tv_nr(tvs, &arg_idx) : va_arg(ap, int); + const int j = (tvs + ? (int)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, int))); + // standard demands unsigned char uchar_arg = (unsigned char)j; str_arg = (char *)&uchar_arg; @@ -924,8 +1598,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t case 's': case 'S': - str_arg = tvs ? tv_str(tvs, &arg_idx, &tofree) - : va_arg(ap, const char *); + str_arg = (tvs + ? tv_str(tvs, &arg_idx, &tofree) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, const char *))); + if (!str_arg) { str_arg = "[NULL]"; str_arg_l = 6; @@ -946,10 +1624,10 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t - str_arg); } if (fmt_spec == 'S') { - char *p1; + const char *p1; size_t i; - for (i = 0, p1 = (char *)str_arg; *p1; p1 += utfc_ptr2len(p1)) { + for (i = 0, p1 = str_arg; *p1; p1 += utfc_ptr2len(p1)) { size_t cell = (size_t)utf_ptr2cells(p1); if (precision_specified && i + cell > precision) { break; @@ -992,7 +1670,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t const void *ptr_arg = NULL; if (fmt_spec == 'p') { - ptr_arg = tvs ? tv_ptr(tvs, &arg_idx) : va_arg(ap, void *); + ptr_arg = (tvs + ? tv_ptr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, void *))); + if (ptr_arg) { arg_sign = 1; } @@ -1000,23 +1683,41 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t // signed switch (length_modifier) { case '\0': - arg = (int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, int)); + arg = (tvs + ? (int)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, int))); break; case 'h': // char and short arguments are passed as int16_t - arg = (int16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, int)); + arg = (int16_t) + (tvs + ? (int)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, int))); break; case 'l': - arg = (tvs ? (long)tv_nr(tvs, &arg_idx) : va_arg(ap, long)); + arg = (tvs + ? (long)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, long))); break; - case '2': - arg = ( - tvs - ? (long long)tv_nr(tvs, &arg_idx) // NOLINT (runtime/int) - : va_arg(ap, long long)); // NOLINT (runtime/int) + case 'L': + arg = (tvs + ? (long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, long long))); // NOLINT(runtime/int) break; - case 'z': - arg = (tvs ? (ptrdiff_t)tv_nr(tvs, &arg_idx) : va_arg(ap, ptrdiff_t)); + case 'z': // implementation-defined, usually ptrdiff_t + arg = (tvs + ? (ptrdiff_t)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, ptrdiff_t))); break; } if (arg > 0) { @@ -1028,23 +1729,40 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t // unsigned switch (length_modifier) { case '\0': - uarg = (unsigned int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int)); + uarg = (tvs + ? (unsigned)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, unsigned))); break; case 'h': - uarg = (uint16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int)); + uarg = (uint16_t) + (tvs + ? (unsigned)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, unsigned))); break; case 'l': - uarg = (tvs ? (unsigned long)tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned long)); + uarg = (tvs + ? (unsigned long)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, unsigned long))); break; - case '2': - uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int) - tvs - ? ((unsigned long long) // NOLINT (runtime/int) - tv_nr(tvs, &arg_idx)) - : va_arg(ap, unsigned long long)); // NOLINT (runtime/int) + case 'L': + uarg = (tvs + ? (unsigned long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, unsigned long long))); // NOLINT(runtime/int) break; case 'z': - uarg = (tvs ? (size_t)tv_nr(tvs, &arg_idx) : va_arg(ap, size_t)); + uarg = (tvs + ? (size_t)tv_nr(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, size_t))); break; } arg_sign = (uarg != 0); @@ -1179,7 +1897,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t char format[40]; int remove_trailing_zeroes = false; - double f = tvs ? tv_float(tvs, &arg_idx) : va_arg(ap, double); + double f = (tvs + ? tv_float(tvs, &arg_idx) + : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, + &arg_cur, fmt), + va_arg(ap, double))); + double abs_f = f < 0 ? -f : f; if (fmt_spec == 'g' || fmt_spec == 'G') { @@ -1234,7 +1957,6 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t assert(str_arg_l < sizeof(tmp)); if (remove_trailing_zeroes) { - int i; char *tp; // using %g or %G: remove superfluous zeroes @@ -1249,7 +1971,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t STRMOVE(tp + 1, tp + 2); str_arg_l--; } - i = (tp[1] == '-') ? 2 : 1; + int i = (tp[1] == '-') ? 2 : 1; while (tp[i] == '0') { // change "1.0e07" to "1.0e7" STRMOVE(tp + i, tp + i + 1); @@ -1397,10 +2119,14 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0'; } - if (tvs && tvs[arg_idx - 1].v_type != VAR_UNKNOWN) { + if (tvs != NULL + && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN) { emsg(_("E767: Too many arguments to printf()")); } + xfree(ap_types); + va_end(ap); + // return the number of characters formatted (excluding trailing nul // character); that is, the number of characters that would have been // written to the buffer if it were large enough. @@ -1441,20 +2167,17 @@ int kv_do_printf(StringBuilder *str, const char *fmt, ...) /// /// @return the allocated string. char *reverse_text(char *s) - FUNC_ATTR_NONNULL_RET + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET { - // Reverse the pattern. size_t len = strlen(s); char *rev = xmalloc(len + 1); - size_t rev_i = len; - for (size_t s_i = 0; s_i < len; s_i++) { + for (size_t s_i = 0, rev_i = len; s_i < len; s_i++) { const int mb_len = utfc_ptr2len(s + s_i); rev_i -= (size_t)mb_len; memmove(rev + rev_i, s + s_i, (size_t)mb_len); s_i += (size_t)mb_len - 1; } rev[len] = NUL; - return rev; } @@ -1468,7 +2191,7 @@ char *reverse_text(char *s) /// @return [allocated] Copy of the string. char *strrep(const char *src, const char *what, const char *rep) { - char *pos = (char *)src; + const char *pos = src; size_t whatlen = strlen(what); // Count occurrences @@ -1499,3 +2222,761 @@ char *strrep(const char *src, const char *what, const char *rep) return ret; } + +/// Implementation of "byteidx()" and "byteidxcomp()" functions +static void byteidx_common(typval_T *argvars, typval_T *rettv, int comp) +{ + rettv->vval.v_number = -1; + + const char *const str = tv_get_string_chk(&argvars[0]); + varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) { + return; + } + + varnumber_T utf16idx = false; + if (argvars[2].v_type != VAR_UNKNOWN) { + bool error = false; + utf16idx = tv_get_bool_chk(&argvars[2], &error); + if (error) { + return; + } + if (utf16idx < 0 || utf16idx > 1) { + semsg(_(e_using_number_as_bool_nr), utf16idx); + return; + } + } + + int (*ptr2len)(const char *); + if (comp) { + ptr2len = utf_ptr2len; + } else { + ptr2len = utfc_ptr2len; + } + + const char *t = str; + for (; idx > 0; idx--) { + if (*t == NUL) { // EOL reached. + return; + } + if (utf16idx) { + const int clen = ptr2len(t); + const int c = (clen > 1) ? utf_ptr2char(t) : *t; + if (c > 0xFFFF) { + idx--; + } + } + if (idx > 0) { + t += ptr2len(t); + } + } + rettv->vval.v_number = (varnumber_T)(t - str); +} + +/// "byteidx()" function +void f_byteidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + byteidx_common(argvars, rettv, false); +} + +/// "byteidxcomp()" function +void f_byteidxcomp(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + byteidx_common(argvars, rettv, true); +} + +/// "charidx()" function +void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->vval.v_number = -1; + + if (tv_check_for_string_arg(argvars, 0) == FAIL + || tv_check_for_number_arg(argvars, 1) == FAIL + || tv_check_for_opt_bool_arg(argvars, 2) == FAIL + || (argvars[2].v_type != VAR_UNKNOWN + && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) { + return; + } + + const char *const str = tv_get_string_chk(&argvars[0]); + varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) { + return; + } + + varnumber_T countcc = false; + varnumber_T utf16idx = false; + if (argvars[2].v_type != VAR_UNKNOWN) { + countcc = tv_get_bool(&argvars[2]); + if (argvars[3].v_type != VAR_UNKNOWN) { + utf16idx = tv_get_bool(&argvars[3]); + } + } + + int (*ptr2len)(const char *); + if (countcc) { + ptr2len = utf_ptr2len; + } else { + ptr2len = utfc_ptr2len; + } + + const char *p; + int len; + for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) { + if (*p == NUL) { + // If the index is exactly the number of bytes or utf-16 code units + // in the string then return the length of the string in characters. + if (utf16idx ? (idx == 0) : (p == (str + idx))) { + rettv->vval.v_number = len; + } + return; + } + if (utf16idx) { + idx--; + const int clen = ptr2len(p); + const int c = (clen > 1) ? utf_ptr2char(p) : *p; + if (c > 0xFFFF) { + idx--; + } + } + p += ptr2len(p); + } + + rettv->vval.v_number = len > 0 ? len - 1 : 0; +} + +/// "str2list()" function +void f_str2list(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + tv_list_alloc_ret(rettv, kListLenUnknown); + const char *p = tv_get_string(&argvars[0]); + + for (; *p != NUL; p += utf_ptr2len(p)) { + tv_list_append_number(rettv->vval.v_list, utf_ptr2char(p)); + } +} + +/// "str2nr()" function +void f_str2nr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + int base = 10; + int what = 0; + + if (argvars[1].v_type != VAR_UNKNOWN) { + base = (int)tv_get_number(&argvars[1]); + if (base != 2 && base != 8 && base != 10 && base != 16) { + emsg(_(e_invarg)); + return; + } + if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) { + what |= STR2NR_QUOTE; + } + } + + char *p = skipwhite(tv_get_string(&argvars[0])); + bool isneg = (*p == '-'); + if (*p == '+' || *p == '-') { + p = skipwhite(p + 1); + } + switch (base) { + case 2: + what |= STR2NR_BIN | STR2NR_FORCE; + break; + case 8: + what |= STR2NR_OCT | STR2NR_OOCT | STR2NR_FORCE; + break; + case 16: + what |= STR2NR_HEX | STR2NR_FORCE; + break; + } + varnumber_T n; + vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, false, NULL); + // Text after the number is silently ignored. + if (isneg) { + rettv->vval.v_number = -n; + } else { + rettv->vval.v_number = n; + } +} + +/// "strgetchar()" function +void f_strgetchar(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->vval.v_number = -1; + + const char *const str = tv_get_string_chk(&argvars[0]); + if (str == NULL) { + return; + } + bool error = false; + varnumber_T charidx = tv_get_number_chk(&argvars[1], &error); + if (error) { + return; + } + + const size_t len = strlen(str); + size_t byteidx = 0; + + while (charidx >= 0 && byteidx < len) { + if (charidx == 0) { + rettv->vval.v_number = utf_ptr2char(str + byteidx); + break; + } + charidx--; + byteidx += (size_t)utf_ptr2len(str + byteidx); + } +} + +/// "stridx()" function +void f_stridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->vval.v_number = -1; + + char buf[NUMBUFLEN]; + const char *const needle = tv_get_string_chk(&argvars[1]); + const char *haystack = tv_get_string_buf_chk(&argvars[0], buf); + const char *const haystack_start = haystack; + if (needle == NULL || haystack == NULL) { + return; // Type error; errmsg already given. + } + + if (argvars[2].v_type != VAR_UNKNOWN) { + bool error = false; + + const ptrdiff_t start_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2], + &error); + if (error || start_idx >= (ptrdiff_t)strlen(haystack)) { + return; + } + if (start_idx >= 0) { + haystack += start_idx; + } + } + + const char *pos = strstr(haystack, needle); + if (pos != NULL) { + rettv->vval.v_number = (varnumber_T)(pos - haystack_start); + } +} + +/// "string()" function +void f_string(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = encode_tv2string(&argvars[0], NULL); +} + +/// "strlen()" function +void f_strlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->vval.v_number = (varnumber_T)strlen(tv_get_string(&argvars[0])); +} + +static void strchar_common(typval_T *argvars, typval_T *rettv, bool skipcc) +{ + const char *s = tv_get_string(&argvars[0]); + varnumber_T len = 0; + int (*func_mb_ptr2char_adv)(const char **pp); + + func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv; + while (*s != NUL) { + func_mb_ptr2char_adv(&s); + len++; + } + rettv->vval.v_number = len; +} + +/// "strcharlen()" function +void f_strcharlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + strchar_common(argvars, rettv, true); +} + +/// "strchars()" function +void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + varnumber_T skipcc = false; + + if (argvars[1].v_type != VAR_UNKNOWN) { + bool error = false; + skipcc = tv_get_bool_chk(&argvars[1], &error); + if (error) { + return; + } + if (skipcc < 0 || skipcc > 1) { + semsg(_(e_using_number_as_bool_nr), skipcc); + return; + } + } + + strchar_common(argvars, rettv, skipcc); +} + +/// "strutf16len()" function +void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->vval.v_number = -1; + + if (tv_check_for_string_arg(argvars, 0) == FAIL + || tv_check_for_opt_bool_arg(argvars, 1) == FAIL) { + return; + } + + varnumber_T countcc = false; + if (argvars[1].v_type != VAR_UNKNOWN) { + countcc = tv_get_bool(&argvars[1]); + } + + const char *s = tv_get_string(&argvars[0]); + varnumber_T len = 0; + int (*func_mb_ptr2char_adv)(const char **pp); + + func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv; + while (*s != NUL) { + const int ch = func_mb_ptr2char_adv(&s); + if (ch > 0xFFFF) { + len++; + } + len++; + } + rettv->vval.v_number = len; +} + +/// "strdisplaywidth()" function +void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + const char *const s = tv_get_string(&argvars[0]); + int col = 0; + + if (argvars[1].v_type != VAR_UNKNOWN) { + col = (int)tv_get_number(&argvars[1]); + } + + rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, (char *)s) - col); +} + +/// "strwidth()" function +void f_strwidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + const char *const s = tv_get_string(&argvars[0]); + + rettv->vval.v_number = (varnumber_T)mb_string2cells(s); +} + +/// "strcharpart()" function +void f_strcharpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + const char *const p = tv_get_string(&argvars[0]); + const size_t slen = strlen(p); + + int nbyte = 0; + varnumber_T skipcc = false; + bool error = false; + varnumber_T nchar = tv_get_number_chk(&argvars[1], &error); + if (!error) { + if (argvars[2].v_type != VAR_UNKNOWN + && argvars[3].v_type != VAR_UNKNOWN) { + skipcc = tv_get_bool_chk(&argvars[3], &error); + if (error) { + return; + } + if (skipcc < 0 || skipcc > 1) { + semsg(_(e_using_number_as_bool_nr), skipcc); + return; + } + } + + if (nchar > 0) { + while (nchar > 0 && (size_t)nbyte < slen) { + if (skipcc) { + nbyte += utfc_ptr2len(p + nbyte); + } else { + nbyte += utf_ptr2len(p + nbyte); + } + nchar--; + } + } else { + nbyte = (int)nchar; + } + } + int len = 0; + if (argvars[2].v_type != VAR_UNKNOWN) { + int charlen = (int)tv_get_number(&argvars[2]); + while (charlen > 0 && nbyte + len < (int)slen) { + int off = nbyte + len; + + if (off < 0) { + len += 1; + } else { + if (skipcc) { + len += utfc_ptr2len(p + off); + } else { + len += utf_ptr2len(p + off); + } + } + charlen--; + } + } else { + len = (int)slen - nbyte; // default: all bytes that are available. + } + + // Only return the overlap between the specified part and the actual + // string. + if (nbyte < 0) { + len += nbyte; + nbyte = 0; + } else if ((size_t)nbyte > slen) { + nbyte = (int)slen; + } + if (len < 0) { + len = 0; + } else if (nbyte + len > (int)slen) { + len = (int)slen - nbyte; + } + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = xmemdupz(p + nbyte, (size_t)len); +} + +/// "strpart()" function +void f_strpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + bool error = false; + + const char *const p = tv_get_string(&argvars[0]); + const size_t slen = strlen(p); + + varnumber_T n = tv_get_number_chk(&argvars[1], &error); + varnumber_T len; + if (error) { + len = 0; + } else if (argvars[2].v_type != VAR_UNKNOWN) { + len = tv_get_number(&argvars[2]); + } else { + len = (varnumber_T)slen - n; // Default len: all bytes that are available. + } + + // Only return the overlap between the specified part and the actual + // string. + if (n < 0) { + len += n; + n = 0; + } else if (n > (varnumber_T)slen) { + n = (varnumber_T)slen; + } + if (len < 0) { + len = 0; + } else if (n + len > (varnumber_T)slen) { + len = (varnumber_T)slen - n; + } + + if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) { + int off; + + // length in characters + for (off = (int)n; off < (int)slen && len > 0; len--) { + off += utfc_ptr2len(p + off); + } + len = off - n; + } + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = xmemdupz(p + n, (size_t)len); +} + +/// "strridx()" function +void f_strridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + char buf[NUMBUFLEN]; + const char *const needle = tv_get_string_chk(&argvars[1]); + const char *const haystack = tv_get_string_buf_chk(&argvars[0], buf); + + rettv->vval.v_number = -1; + if (needle == NULL || haystack == NULL) { + return; // Type error; errmsg already given. + } + + const size_t haystack_len = strlen(haystack); + ptrdiff_t end_idx; + if (argvars[2].v_type != VAR_UNKNOWN) { + // Third argument: upper limit for index. + end_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2], NULL); + if (end_idx < 0) { + return; // Can never find a match. + } + } else { + end_idx = (ptrdiff_t)haystack_len; + } + + const char *lastmatch = NULL; + if (*needle == NUL) { + // Empty string matches past the end. + lastmatch = haystack + end_idx; + } else { + for (const char *rest = haystack; *rest != NUL; rest++) { + rest = strstr(rest, needle); + if (rest == NULL || rest > haystack + end_idx) { + break; + } + lastmatch = rest; + } + } + + if (lastmatch != NULL) { + rettv->vval.v_number = (varnumber_T)(lastmatch - haystack); + } +} + +/// "strtrans()" function +void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true); +} + +/// "utf16idx()" function +/// +/// Converts a byte or character offset in a string to the corresponding UTF-16 +/// code unit offset. +void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->vval.v_number = -1; + + if (tv_check_for_string_arg(argvars, 0) == FAIL + || tv_check_for_opt_number_arg(argvars, 1) == FAIL + || tv_check_for_opt_bool_arg(argvars, 2) == FAIL + || (argvars[2].v_type != VAR_UNKNOWN + && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) { + return; + } + + const char *const str = tv_get_string_chk(&argvars[0]); + varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) { + return; + } + + varnumber_T countcc = false; + varnumber_T charidx = false; + if (argvars[2].v_type != VAR_UNKNOWN) { + countcc = tv_get_bool(&argvars[2]); + if (argvars[3].v_type != VAR_UNKNOWN) { + charidx = tv_get_bool(&argvars[3]); + } + } + + int (*ptr2len)(const char *); + if (countcc) { + ptr2len = utf_ptr2len; + } else { + ptr2len = utfc_ptr2len; + } + + const char *p; + int len; + int utf16idx = 0; + for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) { + if (*p == NUL) { + // If the index is exactly the number of bytes or characters in the + // string then return the length of the string in utf-16 code units. + if (charidx ? (idx == 0) : (p == (str + idx))) { + rettv->vval.v_number = len; + } + return; + } + utf16idx = len; + const int clen = ptr2len(p); + const int c = (clen > 1) ? utf_ptr2char(p) : *p; + if (c > 0xFFFF) { + len++; + } + p += ptr2len(p); + if (charidx) { + idx--; + } + } + + rettv->vval.v_number = utf16idx; +} + +/// "tolower(string)" function +void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), false); +} + +/// "toupper(string)" function +void f_toupper(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), true); +} + +/// "tr(string, fromstr, tostr)" function +void f_tr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + char buf[NUMBUFLEN]; + char buf2[NUMBUFLEN]; + + const char *in_str = tv_get_string(&argvars[0]); + const char *fromstr = tv_get_string_buf_chk(&argvars[1], buf); + const char *tostr = tv_get_string_buf_chk(&argvars[2], buf2); + + // Default return value: empty string. + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + if (fromstr == NULL || tostr == NULL) { + return; // Type error; errmsg already given. + } + garray_T ga; + ga_init(&ga, (int)sizeof(char), 80); + + // fromstr and tostr have to contain the same number of chars. + bool first = true; + while (*in_str != NUL) { + const char *cpstr = in_str; + const int inlen = utfc_ptr2len(in_str); + int cplen = inlen; + int idx = 0; + int fromlen; + for (const char *p = fromstr; *p != NUL; p += fromlen) { + fromlen = utfc_ptr2len(p); + if (fromlen == inlen && strncmp(in_str, p, (size_t)inlen) == 0) { + int tolen; + for (p = tostr; *p != NUL; p += tolen) { + tolen = utfc_ptr2len(p); + if (idx-- == 0) { + cplen = tolen; + cpstr = p; + break; + } + } + if (*p == NUL) { // tostr is shorter than fromstr. + goto error; + } + break; + } + idx++; + } + + if (first && cpstr == in_str) { + // Check that fromstr and tostr have the same number of + // (multi-byte) characters. Done only once when a character + // of in_str doesn't appear in fromstr. + first = false; + int tolen; + for (const char *p = tostr; *p != NUL; p += tolen) { + tolen = utfc_ptr2len(p); + idx--; + } + if (idx != 0) { + goto error; + } + } + + ga_grow(&ga, cplen); + memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); + ga.ga_len += cplen; + + in_str += inlen; + } + + // add a terminating NUL + ga_append(&ga, NUL); + + rettv->vval.v_string = ga.ga_data; + return; +error: + semsg(_(e_invarg2), fromstr); + ga_clear(&ga); +} + +/// "trim({expr})" function +void f_trim(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + char buf1[NUMBUFLEN]; + char buf2[NUMBUFLEN]; + const char *head = tv_get_string_buf_chk(&argvars[0], buf1); + const char *mask = NULL; + const char *prev; + const char *p; + int dir = 0; + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + if (head == NULL) { + return; + } + + if (tv_check_for_opt_string_arg(argvars, 1) == FAIL) { + return; + } + + if (argvars[1].v_type == VAR_STRING) { + mask = tv_get_string_buf_chk(&argvars[1], buf2); + if (*mask == NUL) { + mask = NULL; + } + + if (argvars[2].v_type != VAR_UNKNOWN) { + bool error = false; + // leading or trailing characters to trim + dir = (int)tv_get_number_chk(&argvars[2], &error); + if (error) { + return; + } + if (dir < 0 || dir > 2) { + semsg(_(e_invarg2), tv_get_string(&argvars[2])); + return; + } + } + } + + if (dir == 0 || dir == 1) { + // Trim leading characters + while (*head != NUL) { + int c1 = utf_ptr2char(head); + if (mask == NULL) { + if (c1 > ' ' && c1 != 0xa0) { + break; + } + } else { + for (p = mask; *p != NUL; MB_PTR_ADV(p)) { + if (c1 == utf_ptr2char(p)) { + break; + } + } + if (*p == NUL) { + break; + } + } + MB_PTR_ADV(head); + } + } + + const char *tail = head + strlen(head); + if (dir == 0 || dir == 2) { + // Trim trailing characters + for (; tail > head; tail = prev) { + prev = tail; + MB_PTR_BACK(head, prev); + int c1 = utf_ptr2char(prev); + if (mask == NULL) { + if (c1 > ' ' && c1 != 0xa0) { + break; + } + } else { + for (p = mask; *p != NUL; MB_PTR_ADV(p)) { + if (c1 == utf_ptr2char(p)) { + break; + } + } + if (*p == NUL) { + break; + } + } + } + } + rettv->vval.v_string = xstrnsave(head, (size_t)(tail - head)); +} |