aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/strings.c
diff options
context:
space:
mode:
authorJosh Rahm <joshuarahm@gmail.com>2023-11-29 22:39:54 +0000
committerJosh Rahm <joshuarahm@gmail.com>2023-11-29 22:39:54 +0000
commit21cb7d04c387e4198ca8098a884c78b56ffcf4c2 (patch)
tree84fe5690df1551f0bb2bdfe1a13aacd29ebc1de7 /src/nvim/strings.c
parentd9c904f85a23a496df4eb6be42aa43f007b22d50 (diff)
parent4a8bf24ac690004aedf5540fa440e788459e5e34 (diff)
downloadrneovim-colorcolchar.tar.gz
rneovim-colorcolchar.tar.bz2
rneovim-colorcolchar.zip
Merge remote-tracking branch 'upstream/master' into colorcolcharcolorcolchar
Diffstat (limited to 'src/nvim/strings.c')
-rw-r--r--src/nvim/strings.c1665
1 files changed, 1573 insertions, 92 deletions
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index 34b3c38103..a439d11818 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -1,6 +1,3 @@
-// This is an open source non-commercial project. Dear PVS-Studio, please check
-// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
-
#include <assert.h>
#include <inttypes.h>
#include <math.h>
@@ -12,23 +9,56 @@
#include <string.h>
#include "auto/config.h"
-#include "nvim/ascii.h"
-#include "nvim/assert.h"
+#include "nvim/ascii_defs.h"
+#include "nvim/assert_defs.h"
#include "nvim/charset.h"
#include "nvim/eval/encode.h"
#include "nvim/eval/typval.h"
#include "nvim/eval/typval_defs.h"
#include "nvim/ex_docmd.h"
+#include "nvim/garray.h"
#include "nvim/gettext.h"
-#include "nvim/macros.h"
+#include "nvim/globals.h"
+#include "nvim/macros_defs.h"
#include "nvim/math.h"
#include "nvim/mbyte.h"
#include "nvim/memory.h"
#include "nvim/message.h"
#include "nvim/option.h"
+#include "nvim/plines.h"
#include "nvim/strings.h"
-#include "nvim/types.h"
-#include "nvim/vim.h"
+#include "nvim/types_defs.h"
+#include "nvim/vim_defs.h"
+
+static const char e_cannot_mix_positional_and_non_positional_str[]
+ = N_("E1500: Cannot mix positional and non-positional arguments: %s");
+static const char e_fmt_arg_nr_unused_str[]
+ = N_("E1501: format argument %d unused in $-style format: %s");
+static const char e_positional_num_field_spec_reused_str_str[]
+ = N_("E1502: Positional argument %d used as field width reused as different type: %s/%s");
+static const char e_positional_nr_out_of_bounds_str[]
+ = N_("E1503: Positional argument %d out of bounds: %s");
+static const char e_positional_arg_num_type_inconsistent_str_str[]
+ = N_("E1504: Positional argument %d type used inconsistently: %s/%s");
+static const char e_invalid_format_specifier_str[]
+ = N_("E1505: Invalid format specifier: %s");
+static const char e_aptypes_is_null_nr_str[]
+ = "E1507: Internal error: ap_types or ap_types[idx] is NULL: %d: %s";
+
+static const char typename_unknown[] = N_("unknown");
+static const char typename_int[] = N_("int");
+static const char typename_longint[] = N_("long int");
+static const char typename_longlongint[] = N_("long long int");
+static const char typename_signedsizet[] = N_("signed size_t");
+static const char typename_unsignedint[] = N_("unsigned int");
+static const char typename_unsignedlongint[] = N_("unsigned long int");
+static const char typename_unsignedlonglongint[] = N_("unsigned long long int");
+static const char typename_sizet[] = N_("size_t");
+static const char typename_pointer[] = N_("pointer");
+static const char typename_percent[] = N_("percent");
+static const char typename_char[] = N_("char");
+static const char typename_string[] = N_("string");
+static const char typename_float[] = N_("float");
/// Copy up to `len` bytes of `string` into newly allocated memory and
/// terminate with a NUL. The allocated memory always has size `len + 1`, even
@@ -350,18 +380,6 @@ void del_trailing_spaces(char *ptr)
}
}
-#if !defined(HAVE_STRNLEN)
-size_t xstrnlen(const char *s, size_t n)
- FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
-{
- const char *end = memchr(s, '\0', n);
- if (end == NULL) {
- return n;
- }
- return (size_t)(end - s);
-}
-#endif
-
#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP))
// Compare two strings, ignoring case, using current locale.
// Doesn't work for multi-byte characters.
@@ -371,7 +389,7 @@ int vim_stricmp(const char *s1, const char *s2)
{
int i;
- for (;;) {
+ while (true) {
i = (int)TOLOWER_LOC((uint8_t)(*s1)) - (int)TOLOWER_LOC((uint8_t)(*s2));
if (i != 0) {
return i; // this character different
@@ -411,6 +429,13 @@ int vim_strnicmp(const char *s1, const char *s2, size_t len)
}
#endif
+/// Case-insensitive `strequal`.
+bool striequal(const char *a, const char *b)
+ FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ return (a == NULL && b == NULL) || (a && b && STRICMP(a, b) == 0);
+}
+
/// strchr() version which handles multibyte strings
///
/// @param[in] string String to search in.
@@ -455,10 +480,8 @@ void sort_strings(char **files, int count)
bool has_non_ascii(const char *s)
FUNC_ATTR_PURE
{
- const char *p;
-
if (s != NULL) {
- for (p = s; *p != NUL; p++) {
+ for (const char *p = s; *p != NUL; p++) {
if ((uint8_t)(*p) >= 128) {
return true;
}
@@ -498,13 +521,10 @@ static const char *const e_printf =
/// Get number argument from idxp entry in tvs
///
-/// Will give an error message for VimL entry with invalid type or for
-/// insufficient entries.
+/// Will give an error message for Vimscript entry with invalid type or for insufficient entries.
///
-/// @param[in] tvs List of VimL values. List is terminated by VAR_UNKNOWN
-/// value.
-/// @param[in,out] idxp Index in a list. Will be incremented. Indexing starts
-/// at 1.
+/// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN value.
+/// @param[in,out] idxp Index in a list. Will be incremented. Indexing starts at 1.
///
/// @return Number value or 0 in case of error.
static varnumber_T tv_nr(typval_T *tvs, int *idxp)
@@ -528,10 +548,10 @@ static varnumber_T tv_nr(typval_T *tvs, int *idxp)
/// Get string argument from idxp entry in tvs
///
-/// Will give an error message for VimL entry with invalid type or for
+/// Will give an error message for Vimscript entry with invalid type or for
/// insufficient entries.
///
-/// @param[in] tvs List of VimL values. List is terminated by VAR_UNKNOWN
+/// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN
/// value.
/// @param[in,out] idxp Index in a list. Will be incremented.
/// @param[out] tofree If the idxp entry in tvs is not a String or a Number,
@@ -562,7 +582,7 @@ static const char *tv_str(typval_T *tvs, int *idxp, char **const tofree)
/// Get pointer argument from the next entry in tvs
///
-/// Will give an error message for VimL entry with invalid type or for
+/// Will give an error message for Vimscript entry with invalid type or for
/// insufficient entries.
///
/// @param[in] tvs List of typval_T values.
@@ -573,7 +593,7 @@ static const void *tv_ptr(const typval_T *const tvs, int *const idxp)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
{
#define OFF(attr) offsetof(union typval_vval_union, attr)
- STATIC_ASSERT(OFF(v_string) == OFF(v_list) // -V568
+ STATIC_ASSERT(OFF(v_string) == OFF(v_list)
&& OFF(v_string) == OFF(v_dict)
&& OFF(v_string) == OFF(v_partial)
&& sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_list)
@@ -593,11 +613,10 @@ static const void *tv_ptr(const typval_T *const tvs, int *const idxp)
/// Get float argument from idxp entry in tvs
///
-/// Will give an error message for VimL entry with invalid type or for
+/// Will give an error message for Vimscript entry with invalid type or for
/// insufficient entries.
///
-/// @param[in] tvs List of VimL values. List is terminated by VAR_UNKNOWN
-/// value.
+/// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN value.
/// @param[in,out] idxp Index in a list. Will be incremented.
///
/// @return Floating-point value or zero in case of error.
@@ -719,23 +738,608 @@ int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap)
return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
}
+enum {
+ TYPE_UNKNOWN = -1,
+ TYPE_INT,
+ TYPE_LONGINT,
+ TYPE_LONGLONGINT,
+ TYPE_SIGNEDSIZET,
+ TYPE_UNSIGNEDINT,
+ TYPE_UNSIGNEDLONGINT,
+ TYPE_UNSIGNEDLONGLONGINT,
+ TYPE_SIZET,
+ TYPE_POINTER,
+ TYPE_PERCENT,
+ TYPE_CHAR,
+ TYPE_STRING,
+ TYPE_FLOAT,
+};
+
+/// Types that can be used in a format string
+static int format_typeof(const char *type)
+ FUNC_ATTR_NONNULL_ALL
+{
+ // allowed values: \0, h, l, L
+ char length_modifier = '\0';
+
+ // current conversion specifier character
+ char fmt_spec = '\0';
+
+ // parse 'h', 'l', 'll' and 'z' length modifiers
+ if (*type == 'h' || *type == 'l' || *type == 'z') {
+ length_modifier = *type;
+ type++;
+ if (length_modifier == 'l' && *type == 'l') {
+ // double l = long long
+ length_modifier = 'L';
+ type++;
+ }
+ }
+ fmt_spec = *type;
+
+ // common synonyms:
+ switch (fmt_spec) {
+ case 'i':
+ fmt_spec = 'd'; break;
+ case '*':
+ fmt_spec = 'd'; length_modifier = 'h'; break;
+ case 'D':
+ fmt_spec = 'd'; length_modifier = 'l'; break;
+ case 'U':
+ fmt_spec = 'u'; length_modifier = 'l'; break;
+ case 'O':
+ fmt_spec = 'o'; length_modifier = 'l'; break;
+ default:
+ break;
+ }
+
+ // get parameter value, do initial processing
+ switch (fmt_spec) {
+ // '%' and 'c' behave similar to 's' regarding flags and field
+ // widths
+ case '%':
+ return TYPE_PERCENT;
+
+ case 'c':
+ return TYPE_CHAR;
+
+ case 's':
+ case 'S':
+ return TYPE_STRING;
+
+ case 'd':
+ case 'u':
+ case 'b':
+ case 'B':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'p':
+ // NOTE: the u, b, o, x, X and p conversion specifiers
+ // imply the value is unsigned; d implies a signed
+ // value
+
+ // 0 if numeric argument is zero (or if pointer is
+ // NULL for 'p'), +1 if greater than zero (or nonzero
+ // for unsigned arguments), -1 if negative (unsigned
+ // argument is never negative)
+
+ if (fmt_spec == 'p') {
+ return TYPE_POINTER;
+ } else if (fmt_spec == 'b' || fmt_spec == 'B') {
+ return TYPE_UNSIGNEDLONGLONGINT;
+ } else if (fmt_spec == 'd') {
+ // signed
+ switch (length_modifier) {
+ case '\0':
+ case 'h':
+ // char and short arguments are passed as int.
+ return TYPE_INT;
+ case 'l':
+ return TYPE_LONGINT;
+ case 'L':
+ return TYPE_LONGLONGINT;
+ case 'z':
+ return TYPE_SIGNEDSIZET;
+ }
+ } else {
+ // unsigned
+ switch (length_modifier) {
+ case '\0':
+ case 'h':
+ return TYPE_UNSIGNEDINT;
+ case 'l':
+ return TYPE_UNSIGNEDLONGINT;
+ case 'L':
+ return TYPE_UNSIGNEDLONGLONGINT;
+ case 'z':
+ return TYPE_SIZET;
+ }
+ }
+ break;
+
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ return TYPE_FLOAT;
+ }
+
+ return TYPE_UNKNOWN;
+}
+
+static char *format_typename(const char *type)
+ FUNC_ATTR_NONNULL_ALL
+{
+ switch (format_typeof(type)) {
+ case TYPE_INT:
+ return _(typename_int);
+ case TYPE_LONGINT:
+ return _(typename_longint);
+ case TYPE_LONGLONGINT:
+ return _(typename_longlongint);
+ case TYPE_UNSIGNEDINT:
+ return _(typename_unsignedint);
+ case TYPE_SIGNEDSIZET:
+ return _(typename_signedsizet);
+ case TYPE_UNSIGNEDLONGINT:
+ return _(typename_unsignedlongint);
+ case TYPE_UNSIGNEDLONGLONGINT:
+ return _(typename_unsignedlonglongint);
+ case TYPE_SIZET:
+ return _(typename_sizet);
+ case TYPE_POINTER:
+ return _(typename_pointer);
+ case TYPE_PERCENT:
+ return _(typename_percent);
+ case TYPE_CHAR:
+ return _(typename_char);
+ case TYPE_STRING:
+ return _(typename_string);
+ case TYPE_FLOAT:
+ return _(typename_float);
+ }
+
+ return _(typename_unknown);
+}
+
+static int adjust_types(const char ***ap_types, int arg, int *num_posarg, const char *type)
+ FUNC_ATTR_NONNULL_ALL
+{
+ if (*ap_types == NULL || *num_posarg < arg) {
+ const char **new_types = *ap_types == NULL
+ ? xcalloc(sizeof(const char *), (size_t)arg)
+ : xrealloc(*ap_types, (size_t)arg * sizeof(const char *));
+
+ for (int idx = *num_posarg; idx < arg; idx++) {
+ new_types[idx] = NULL;
+ }
+
+ *ap_types = new_types;
+ *num_posarg = arg;
+ }
+
+ if ((*ap_types)[arg - 1] != NULL) {
+ if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*') {
+ const char *pt = type;
+ if (pt[0] == '*') {
+ pt = (*ap_types)[arg - 1];
+ }
+
+ if (pt[0] != '*') {
+ switch (pt[0]) {
+ case 'd':
+ case 'i':
+ break;
+ default:
+ semsg(_(e_positional_num_field_spec_reused_str_str), arg,
+ format_typename((*ap_types)[arg - 1]), format_typename(type));
+ return FAIL;
+ }
+ }
+ } else {
+ if (format_typeof(type) != format_typeof((*ap_types)[arg - 1])) {
+ semsg(_(e_positional_arg_num_type_inconsistent_str_str), arg,
+ format_typename(type), format_typename((*ap_types)[arg - 1]));
+ return FAIL;
+ }
+ }
+ }
+
+ (*ap_types)[arg - 1] = type;
+
+ return OK;
+}
+
+static int parse_fmt_types(const char ***ap_types, int *num_posarg, const char *fmt, typval_T *tvs)
+ FUNC_ATTR_NONNULL_ARG(1, 2)
+{
+ const char *p = fmt;
+ const char *arg = NULL;
+
+ int any_pos = 0;
+ int any_arg = 0;
+
+#define CHECK_POS_ARG \
+ do { \
+ if (any_pos && any_arg) { \
+ semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt); \
+ goto error; \
+ } \
+ } while (0);
+
+ if (p == NULL) {
+ return OK;
+ }
+
+ while (*p != NUL) {
+ if (*p != '%') {
+ char *q = strchr(p + 1, '%');
+ size_t n = (q == NULL) ? strlen(p) : (size_t)(q - p);
+
+ p += n;
+ } else {
+ // allowed values: \0, h, l, L
+ char length_modifier = '\0';
+
+ // variable for positional arg
+ int pos_arg = -1;
+
+ p++; // skip '%'
+
+ // First check to see if we find a positional
+ // argument specifier
+ const char *ptype = p;
+
+ while (ascii_isdigit(*ptype)) {
+ ptype++;
+ }
+
+ if (*ptype == '$') {
+ if (*p == '0') {
+ // 0 flag at the wrong place
+ semsg(_(e_invalid_format_specifier_str), fmt);
+ goto error;
+ }
+
+ // Positional argument
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+ pos_arg = (int)uj;
+
+ any_pos = 1;
+ CHECK_POS_ARG;
+
+ p++;
+ }
+
+ // parse flags
+ while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
+ || *p == '#' || *p == '\'') {
+ switch (*p) {
+ case '0':
+ break;
+ case '-':
+ break;
+ case '+':
+ break;
+ case ' ': // If both the ' ' and '+' flags appear, the ' '
+ // flag should be ignored
+ break;
+ case '#':
+ break;
+ case '\'':
+ break;
+ }
+ p++;
+ }
+ // If the '0' and '-' flags both appear, the '0' flag should be
+ // ignored.
+
+ // parse field width
+ if (*(arg = p) == '*') {
+ p++;
+
+ if (ascii_isdigit((int)(*p))) {
+ // Positional argument field width
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+
+ if (*p != '$') {
+ semsg(_(e_invalid_format_specifier_str), fmt);
+ goto error;
+ } else {
+ p++;
+ any_pos = 1;
+ CHECK_POS_ARG;
+
+ if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) {
+ goto error;
+ }
+ }
+ } else {
+ any_arg = 1;
+ CHECK_POS_ARG;
+ }
+ } else if (ascii_isdigit((int)(*p))) {
+ // size_t could be wider than unsigned int; make sure we treat
+ // argument like common implementations do
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+
+ if (*p == '$') {
+ semsg(_(e_invalid_format_specifier_str), fmt);
+ goto error;
+ }
+ }
+
+ // parse precision
+ if (*p == '.') {
+ p++;
+
+ if (*(arg = p) == '*') {
+ p++;
+
+ if (ascii_isdigit((int)(*p))) {
+ // Parse precision
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+
+ if (*p == '$') {
+ any_pos = 1;
+ CHECK_POS_ARG;
+
+ p++;
+
+ if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) {
+ goto error;
+ }
+ } else {
+ semsg(_(e_invalid_format_specifier_str), fmt);
+ goto error;
+ }
+ } else {
+ any_arg = 1;
+ CHECK_POS_ARG;
+ }
+ } else if (ascii_isdigit((int)(*p))) {
+ // size_t could be wider than unsigned int; make sure we
+ // treat argument like common implementations do
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+
+ if (*p == '$') {
+ semsg(_(e_invalid_format_specifier_str), fmt);
+ goto error;
+ }
+ }
+ }
+
+ if (pos_arg != -1) {
+ any_pos = 1;
+ CHECK_POS_ARG;
+
+ ptype = p;
+ }
+
+ // parse 'h', 'l', 'll' and 'z' length modifiers
+ if (*p == 'h' || *p == 'l' || *p == 'z') {
+ length_modifier = *p;
+ p++;
+ if (length_modifier == 'l' && *p == 'l') {
+ // double l = long long
+ // length_modifier = 'L';
+ p++;
+ }
+ }
+
+ switch (*p) {
+ // Check for known format specifiers. % is special!
+ case 'i':
+ case '*':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'D':
+ case 'U':
+ case 'O':
+ case 'x':
+ case 'X':
+ case 'b':
+ case 'B':
+ case 'c':
+ case 's':
+ case 'S':
+ case 'p':
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ if (pos_arg != -1) {
+ if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL) {
+ goto error;
+ }
+ } else {
+ any_arg = 1;
+ CHECK_POS_ARG;
+ }
+ break;
+
+ default:
+ if (pos_arg != -1) {
+ semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt);
+ goto error;
+ }
+ }
+
+ if (*p != NUL) {
+ p++; // step over the just processed conversion specifier
+ }
+ }
+ }
+
+ for (int arg_idx = 0; arg_idx < *num_posarg; arg_idx++) {
+ if ((*ap_types)[arg_idx] == NULL) {
+ semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
+ goto error;
+ }
+
+ if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN) {
+ semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
+ goto error;
+ }
+ }
+
+ return OK;
+
+error:
+ xfree(*ap_types);
+ *ap_types = NULL;
+ *num_posarg = 0;
+ return FAIL;
+}
+
+static void skip_to_arg(const char **ap_types, va_list ap_start, va_list *ap, int *arg_idx,
+ int *arg_cur, const char *fmt)
+ FUNC_ATTR_NONNULL_ARG(3, 4, 5)
+{
+ int arg_min = 0;
+
+ if (*arg_cur + 1 == *arg_idx) {
+ (*arg_cur)++;
+ (*arg_idx)++;
+ return;
+ }
+
+ if (*arg_cur >= *arg_idx) {
+ // Reset ap to ap_start and skip arg_idx - 1 types
+ va_end(*ap);
+ va_copy(*ap, ap_start);
+ } else {
+ // Skip over any we should skip
+ arg_min = *arg_cur;
+ }
+
+ for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; (*arg_cur)++) {
+ if (ap_types == NULL || ap_types[*arg_cur] == NULL) {
+ siemsg(e_aptypes_is_null_nr_str, fmt, *arg_cur);
+ return;
+ }
+
+ const char *p = ap_types[*arg_cur];
+
+ int fmt_type = format_typeof(p);
+
+ // get parameter value, do initial processing
+ switch (fmt_type) {
+ case TYPE_PERCENT:
+ case TYPE_UNKNOWN:
+ break;
+
+ case TYPE_CHAR:
+ va_arg(*ap, int);
+ break;
+
+ case TYPE_STRING:
+ va_arg(*ap, const char *);
+ break;
+
+ case TYPE_POINTER:
+ va_arg(*ap, void *);
+ break;
+
+ case TYPE_INT:
+ va_arg(*ap, int);
+ break;
+
+ case TYPE_LONGINT:
+ va_arg(*ap, long);
+ break;
+
+ case TYPE_LONGLONGINT:
+ va_arg(*ap, long long); // NOLINT(runtime/int)
+ break;
+
+ case TYPE_SIGNEDSIZET: // implementation-defined, usually ptrdiff_t
+ va_arg(*ap, ptrdiff_t);
+ break;
+
+ case TYPE_UNSIGNEDINT:
+ va_arg(*ap, unsigned);
+ break;
+
+ case TYPE_UNSIGNEDLONGINT:
+ va_arg(*ap, unsigned long);
+ break;
+
+ case TYPE_UNSIGNEDLONGLONGINT:
+ va_arg(*ap, unsigned long long); // NOLINT(runtime/int)
+ break;
+
+ case TYPE_SIZET:
+ va_arg(*ap, size_t);
+ break;
+
+ case TYPE_FLOAT:
+ va_arg(*ap, double);
+ break;
+ }
+ }
+
+ // Because we know that after we return from this call,
+ // a va_arg() call is made, we can pre-emptively
+ // increment the current argument index.
+ (*arg_cur)++;
+ (*arg_idx)++;
+}
+
/// Write formatted value to the string
///
/// @param[out] str String to write to.
/// @param[in] str_m String length.
/// @param[in] fmt String format.
/// @param[in] ap Values that should be formatted. Ignored if tvs is not NULL.
-/// @param[in] tvs Values that should be formatted, for printf() VimL
+/// @param[in] tvs Values that should be formatted, for printf() Vimscript
/// function. Must be NULL in other cases.
///
/// @return Number of bytes excluding NUL byte that would be written to the
/// string if str_m was greater or equal to the return value.
-int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, typval_T *const tvs)
+int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap_start,
+ typval_T *const tvs)
{
size_t str_l = 0;
bool str_avail = str_l < str_m;
const char *p = fmt;
+ int arg_cur = 0;
+ int num_posarg = 0;
int arg_idx = 1;
+ va_list ap;
+ const char **ap_types = NULL;
+
+ if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL) {
+ return 0;
+ }
+
+ va_copy(ap, ap_start);
if (!p) {
p = "";
@@ -791,8 +1395,31 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
// buffer for 's' and 'S' specs
char *tofree = NULL;
+ // variable for positional arg
+ int pos_arg = -1;
+
p++; // skip '%'
+ // First check to see if we find a positional
+ // argument specifier
+ const char *ptype = p;
+
+ while (ascii_isdigit(*ptype)) {
+ ptype++;
+ }
+
+ if (*ptype == '$') {
+ // Positional argument
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+ pos_arg = (int)uj;
+
+ p++;
+ }
+
// parse flags
while (true) {
switch (*p) {
@@ -819,7 +1446,25 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
// parse field width
if (*p == '*') {
p++;
- const int j = tvs ? (int)tv_nr(tvs, &arg_idx) : va_arg(ap, int);
+
+ if (ascii_isdigit((int)(*p))) {
+ // Positional argument field width
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+ arg_idx = (int)uj;
+
+ p++;
+ }
+
+ const int j = (tvs
+ ? (int)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, int)));
+
if (j >= 0) {
min_field_width = (size_t)j;
} else {
@@ -829,10 +1474,10 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
} else if (ascii_isdigit((int)(*p))) {
// size_t could be wider than unsigned int; make sure we treat
// argument like common implementations do
- unsigned int uj = (unsigned)(*p++ - '0');
+ unsigned uj = (unsigned)(*p++ - '0');
while (ascii_isdigit((int)(*p))) {
- uj = 10 * uj + (unsigned int)(*p++ - '0');
+ uj = 10 * uj + (unsigned)(*p++ - '0');
}
min_field_width = uj;
}
@@ -841,24 +1486,43 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
if (*p == '.') {
p++;
precision_specified = 1;
- if (*p == '*') {
- const int j = tvs ? (int)tv_nr(tvs, &arg_idx) : va_arg(ap, int);
+
+ if (ascii_isdigit((int)(*p))) {
+ // size_t could be wider than unsigned int; make sure we
+ // treat argument like common implementations do
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+ precision = uj;
+ } else if (*p == '*') {
p++;
+
+ if (ascii_isdigit((int)(*p))) {
+ // positional argument
+ unsigned uj = (unsigned)(*p++ - '0');
+
+ while (ascii_isdigit((int)(*p))) {
+ uj = 10 * uj + (unsigned)(*p++ - '0');
+ }
+ arg_idx = (int)uj;
+
+ p++;
+ }
+
+ const int j = (tvs
+ ? (int)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, int)));
+
if (j >= 0) {
precision = (size_t)j;
} else {
precision_specified = 0;
precision = 0;
}
- } else if (ascii_isdigit((int)(*p))) {
- // size_t could be wider than unsigned int; make sure we
- // treat argument like common implementations do
- unsigned int uj = (unsigned)(*p++ - '0');
-
- while (ascii_isdigit((int)(*p))) {
- uj = 10 * uj + (unsigned int)(*p++ - '0');
- }
- precision = uj;
}
}
@@ -866,8 +1530,9 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
if (*p == 'h' || *p == 'l' || *p == 'z') {
length_modifier = *p;
p++;
- if (length_modifier == 'l' && *p == 'l') { // ll, encoded as 2
- length_modifier = '2';
+ if (length_modifier == 'l' && *p == 'l') {
+ // double l = long long
+ length_modifier = 'L';
p++;
}
}
@@ -897,10 +1562,14 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
case 'x':
case 'X':
if (tvs && length_modifier == '\0') {
- length_modifier = '2';
+ length_modifier = 'L';
}
}
+ if (pos_arg != -1) {
+ arg_idx = pos_arg;
+ }
+
// get parameter value, do initial processing
switch (fmt_spec) {
// '%' and 'c' behave similar to 's' regarding flags and field widths
@@ -915,7 +1584,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
break;
case 'c': {
- const int j = tvs ? (int)tv_nr(tvs, &arg_idx) : va_arg(ap, int);
+ const int j = (tvs
+ ? (int)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, int)));
+
// standard demands unsigned char
uchar_arg = (unsigned char)j;
str_arg = (char *)&uchar_arg;
@@ -924,8 +1598,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
case 's':
case 'S':
- str_arg = tvs ? tv_str(tvs, &arg_idx, &tofree)
- : va_arg(ap, const char *);
+ str_arg = (tvs
+ ? tv_str(tvs, &arg_idx, &tofree)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, const char *)));
+
if (!str_arg) {
str_arg = "[NULL]";
str_arg_l = 6;
@@ -946,10 +1624,10 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
- str_arg);
}
if (fmt_spec == 'S') {
- char *p1;
+ const char *p1;
size_t i;
- for (i = 0, p1 = (char *)str_arg; *p1; p1 += utfc_ptr2len(p1)) {
+ for (i = 0, p1 = str_arg; *p1; p1 += utfc_ptr2len(p1)) {
size_t cell = (size_t)utf_ptr2cells(p1);
if (precision_specified && i + cell > precision) {
break;
@@ -992,7 +1670,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
const void *ptr_arg = NULL;
if (fmt_spec == 'p') {
- ptr_arg = tvs ? tv_ptr(tvs, &arg_idx) : va_arg(ap, void *);
+ ptr_arg = (tvs
+ ? tv_ptr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, void *)));
+
if (ptr_arg) {
arg_sign = 1;
}
@@ -1000,23 +1683,41 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
// signed
switch (length_modifier) {
case '\0':
- arg = (int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, int));
+ arg = (tvs
+ ? (int)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, int)));
break;
case 'h':
// char and short arguments are passed as int16_t
- arg = (int16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, int));
+ arg = (int16_t)
+ (tvs
+ ? (int)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, int)));
break;
case 'l':
- arg = (tvs ? (long)tv_nr(tvs, &arg_idx) : va_arg(ap, long));
+ arg = (tvs
+ ? (long)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, long)));
break;
- case '2':
- arg = (
- tvs
- ? (long long)tv_nr(tvs, &arg_idx) // NOLINT (runtime/int)
- : va_arg(ap, long long)); // NOLINT (runtime/int)
+ case 'L':
+ arg = (tvs
+ ? (long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, long long))); // NOLINT(runtime/int)
break;
- case 'z':
- arg = (tvs ? (ptrdiff_t)tv_nr(tvs, &arg_idx) : va_arg(ap, ptrdiff_t));
+ case 'z': // implementation-defined, usually ptrdiff_t
+ arg = (tvs
+ ? (ptrdiff_t)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, ptrdiff_t)));
break;
}
if (arg > 0) {
@@ -1028,23 +1729,40 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
// unsigned
switch (length_modifier) {
case '\0':
- uarg = (unsigned int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
+ uarg = (tvs
+ ? (unsigned)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, unsigned)));
break;
case 'h':
- uarg = (uint16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int));
+ uarg = (uint16_t)
+ (tvs
+ ? (unsigned)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, unsigned)));
break;
case 'l':
- uarg = (tvs ? (unsigned long)tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned long));
+ uarg = (tvs
+ ? (unsigned long)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, unsigned long)));
break;
- case '2':
- uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int)
- tvs
- ? ((unsigned long long) // NOLINT (runtime/int)
- tv_nr(tvs, &arg_idx))
- : va_arg(ap, unsigned long long)); // NOLINT (runtime/int)
+ case 'L':
+ uarg = (tvs
+ ? (unsigned long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, unsigned long long))); // NOLINT(runtime/int)
break;
case 'z':
- uarg = (tvs ? (size_t)tv_nr(tvs, &arg_idx) : va_arg(ap, size_t));
+ uarg = (tvs
+ ? (size_t)tv_nr(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, size_t)));
break;
}
arg_sign = (uarg != 0);
@@ -1179,7 +1897,12 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
char format[40];
int remove_trailing_zeroes = false;
- double f = tvs ? tv_float(tvs, &arg_idx) : va_arg(ap, double);
+ double f = (tvs
+ ? tv_float(tvs, &arg_idx)
+ : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
+ &arg_cur, fmt),
+ va_arg(ap, double)));
+
double abs_f = f < 0 ? -f : f;
if (fmt_spec == 'g' || fmt_spec == 'G') {
@@ -1234,7 +1957,6 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
assert(str_arg_l < sizeof(tmp));
if (remove_trailing_zeroes) {
- int i;
char *tp;
// using %g or %G: remove superfluous zeroes
@@ -1249,7 +1971,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
STRMOVE(tp + 1, tp + 2);
str_arg_l--;
}
- i = (tp[1] == '-') ? 2 : 1;
+ int i = (tp[1] == '-') ? 2 : 1;
while (tp[i] == '0') {
// change "1.0e07" to "1.0e7"
STRMOVE(tp + i, tp + i + 1);
@@ -1397,10 +2119,14 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t
str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
}
- if (tvs && tvs[arg_idx - 1].v_type != VAR_UNKNOWN) {
+ if (tvs != NULL
+ && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN) {
emsg(_("E767: Too many arguments to printf()"));
}
+ xfree(ap_types);
+ va_end(ap);
+
// return the number of characters formatted (excluding trailing nul
// character); that is, the number of characters that would have been
// written to the buffer if it were large enough.
@@ -1441,20 +2167,17 @@ int kv_do_printf(StringBuilder *str, const char *fmt, ...)
///
/// @return the allocated string.
char *reverse_text(char *s)
- FUNC_ATTR_NONNULL_RET
+ FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET
{
- // Reverse the pattern.
size_t len = strlen(s);
char *rev = xmalloc(len + 1);
- size_t rev_i = len;
- for (size_t s_i = 0; s_i < len; s_i++) {
+ for (size_t s_i = 0, rev_i = len; s_i < len; s_i++) {
const int mb_len = utfc_ptr2len(s + s_i);
rev_i -= (size_t)mb_len;
memmove(rev + rev_i, s + s_i, (size_t)mb_len);
s_i += (size_t)mb_len - 1;
}
rev[len] = NUL;
-
return rev;
}
@@ -1468,7 +2191,7 @@ char *reverse_text(char *s)
/// @return [allocated] Copy of the string.
char *strrep(const char *src, const char *what, const char *rep)
{
- char *pos = (char *)src;
+ const char *pos = src;
size_t whatlen = strlen(what);
// Count occurrences
@@ -1499,3 +2222,761 @@ char *strrep(const char *src, const char *what, const char *rep)
return ret;
}
+
+/// Implementation of "byteidx()" and "byteidxcomp()" functions
+static void byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
+{
+ rettv->vval.v_number = -1;
+
+ const char *const str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
+ if (str == NULL || idx < 0) {
+ return;
+ }
+
+ varnumber_T utf16idx = false;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ bool error = false;
+ utf16idx = tv_get_bool_chk(&argvars[2], &error);
+ if (error) {
+ return;
+ }
+ if (utf16idx < 0 || utf16idx > 1) {
+ semsg(_(e_using_number_as_bool_nr), utf16idx);
+ return;
+ }
+ }
+
+ int (*ptr2len)(const char *);
+ if (comp) {
+ ptr2len = utf_ptr2len;
+ } else {
+ ptr2len = utfc_ptr2len;
+ }
+
+ const char *t = str;
+ for (; idx > 0; idx--) {
+ if (*t == NUL) { // EOL reached.
+ return;
+ }
+ if (utf16idx) {
+ const int clen = ptr2len(t);
+ const int c = (clen > 1) ? utf_ptr2char(t) : *t;
+ if (c > 0xFFFF) {
+ idx--;
+ }
+ }
+ if (idx > 0) {
+ t += ptr2len(t);
+ }
+ }
+ rettv->vval.v_number = (varnumber_T)(t - str);
+}
+
+/// "byteidx()" function
+void f_byteidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ byteidx_common(argvars, rettv, false);
+}
+
+/// "byteidxcomp()" function
+void f_byteidxcomp(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ byteidx_common(argvars, rettv, true);
+}
+
+/// "charidx()" function
+void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ if (tv_check_for_string_arg(argvars, 0) == FAIL
+ || tv_check_for_number_arg(argvars, 1) == FAIL
+ || tv_check_for_opt_bool_arg(argvars, 2) == FAIL
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
+ return;
+ }
+
+ const char *const str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
+ if (str == NULL || idx < 0) {
+ return;
+ }
+
+ varnumber_T countcc = false;
+ varnumber_T utf16idx = false;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ countcc = tv_get_bool(&argvars[2]);
+ if (argvars[3].v_type != VAR_UNKNOWN) {
+ utf16idx = tv_get_bool(&argvars[3]);
+ }
+ }
+
+ int (*ptr2len)(const char *);
+ if (countcc) {
+ ptr2len = utf_ptr2len;
+ } else {
+ ptr2len = utfc_ptr2len;
+ }
+
+ const char *p;
+ int len;
+ for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) {
+ if (*p == NUL) {
+ // If the index is exactly the number of bytes or utf-16 code units
+ // in the string then return the length of the string in characters.
+ if (utf16idx ? (idx == 0) : (p == (str + idx))) {
+ rettv->vval.v_number = len;
+ }
+ return;
+ }
+ if (utf16idx) {
+ idx--;
+ const int clen = ptr2len(p);
+ const int c = (clen > 1) ? utf_ptr2char(p) : *p;
+ if (c > 0xFFFF) {
+ idx--;
+ }
+ }
+ p += ptr2len(p);
+ }
+
+ rettv->vval.v_number = len > 0 ? len - 1 : 0;
+}
+
+/// "str2list()" function
+void f_str2list(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ tv_list_alloc_ret(rettv, kListLenUnknown);
+ const char *p = tv_get_string(&argvars[0]);
+
+ for (; *p != NUL; p += utf_ptr2len(p)) {
+ tv_list_append_number(rettv->vval.v_list, utf_ptr2char(p));
+ }
+}
+
+/// "str2nr()" function
+void f_str2nr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ int base = 10;
+ int what = 0;
+
+ if (argvars[1].v_type != VAR_UNKNOWN) {
+ base = (int)tv_get_number(&argvars[1]);
+ if (base != 2 && base != 8 && base != 10 && base != 16) {
+ emsg(_(e_invarg));
+ return;
+ }
+ if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) {
+ what |= STR2NR_QUOTE;
+ }
+ }
+
+ char *p = skipwhite(tv_get_string(&argvars[0]));
+ bool isneg = (*p == '-');
+ if (*p == '+' || *p == '-') {
+ p = skipwhite(p + 1);
+ }
+ switch (base) {
+ case 2:
+ what |= STR2NR_BIN | STR2NR_FORCE;
+ break;
+ case 8:
+ what |= STR2NR_OCT | STR2NR_OOCT | STR2NR_FORCE;
+ break;
+ case 16:
+ what |= STR2NR_HEX | STR2NR_FORCE;
+ break;
+ }
+ varnumber_T n;
+ vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, false, NULL);
+ // Text after the number is silently ignored.
+ if (isneg) {
+ rettv->vval.v_number = -n;
+ } else {
+ rettv->vval.v_number = n;
+ }
+}
+
+/// "strgetchar()" function
+void f_strgetchar(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ const char *const str = tv_get_string_chk(&argvars[0]);
+ if (str == NULL) {
+ return;
+ }
+ bool error = false;
+ varnumber_T charidx = tv_get_number_chk(&argvars[1], &error);
+ if (error) {
+ return;
+ }
+
+ const size_t len = strlen(str);
+ size_t byteidx = 0;
+
+ while (charidx >= 0 && byteidx < len) {
+ if (charidx == 0) {
+ rettv->vval.v_number = utf_ptr2char(str + byteidx);
+ break;
+ }
+ charidx--;
+ byteidx += (size_t)utf_ptr2len(str + byteidx);
+ }
+}
+
+/// "stridx()" function
+void f_stridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ char buf[NUMBUFLEN];
+ const char *const needle = tv_get_string_chk(&argvars[1]);
+ const char *haystack = tv_get_string_buf_chk(&argvars[0], buf);
+ const char *const haystack_start = haystack;
+ if (needle == NULL || haystack == NULL) {
+ return; // Type error; errmsg already given.
+ }
+
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ bool error = false;
+
+ const ptrdiff_t start_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2],
+ &error);
+ if (error || start_idx >= (ptrdiff_t)strlen(haystack)) {
+ return;
+ }
+ if (start_idx >= 0) {
+ haystack += start_idx;
+ }
+ }
+
+ const char *pos = strstr(haystack, needle);
+ if (pos != NULL) {
+ rettv->vval.v_number = (varnumber_T)(pos - haystack_start);
+ }
+}
+
+/// "string()" function
+void f_string(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = encode_tv2string(&argvars[0], NULL);
+}
+
+/// "strlen()" function
+void f_strlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = (varnumber_T)strlen(tv_get_string(&argvars[0]));
+}
+
+static void strchar_common(typval_T *argvars, typval_T *rettv, bool skipcc)
+{
+ const char *s = tv_get_string(&argvars[0]);
+ varnumber_T len = 0;
+ int (*func_mb_ptr2char_adv)(const char **pp);
+
+ func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
+ while (*s != NUL) {
+ func_mb_ptr2char_adv(&s);
+ len++;
+ }
+ rettv->vval.v_number = len;
+}
+
+/// "strcharlen()" function
+void f_strcharlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ strchar_common(argvars, rettv, true);
+}
+
+/// "strchars()" function
+void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ varnumber_T skipcc = false;
+
+ if (argvars[1].v_type != VAR_UNKNOWN) {
+ bool error = false;
+ skipcc = tv_get_bool_chk(&argvars[1], &error);
+ if (error) {
+ return;
+ }
+ if (skipcc < 0 || skipcc > 1) {
+ semsg(_(e_using_number_as_bool_nr), skipcc);
+ return;
+ }
+ }
+
+ strchar_common(argvars, rettv, skipcc);
+}
+
+/// "strutf16len()" function
+void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ if (tv_check_for_string_arg(argvars, 0) == FAIL
+ || tv_check_for_opt_bool_arg(argvars, 1) == FAIL) {
+ return;
+ }
+
+ varnumber_T countcc = false;
+ if (argvars[1].v_type != VAR_UNKNOWN) {
+ countcc = tv_get_bool(&argvars[1]);
+ }
+
+ const char *s = tv_get_string(&argvars[0]);
+ varnumber_T len = 0;
+ int (*func_mb_ptr2char_adv)(const char **pp);
+
+ func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
+ while (*s != NUL) {
+ const int ch = func_mb_ptr2char_adv(&s);
+ if (ch > 0xFFFF) {
+ len++;
+ }
+ len++;
+ }
+ rettv->vval.v_number = len;
+}
+
+/// "strdisplaywidth()" function
+void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ const char *const s = tv_get_string(&argvars[0]);
+ int col = 0;
+
+ if (argvars[1].v_type != VAR_UNKNOWN) {
+ col = (int)tv_get_number(&argvars[1]);
+ }
+
+ rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, (char *)s) - col);
+}
+
+/// "strwidth()" function
+void f_strwidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ const char *const s = tv_get_string(&argvars[0]);
+
+ rettv->vval.v_number = (varnumber_T)mb_string2cells(s);
+}
+
+/// "strcharpart()" function
+void f_strcharpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ const char *const p = tv_get_string(&argvars[0]);
+ const size_t slen = strlen(p);
+
+ int nbyte = 0;
+ varnumber_T skipcc = false;
+ bool error = false;
+ varnumber_T nchar = tv_get_number_chk(&argvars[1], &error);
+ if (!error) {
+ if (argvars[2].v_type != VAR_UNKNOWN
+ && argvars[3].v_type != VAR_UNKNOWN) {
+ skipcc = tv_get_bool_chk(&argvars[3], &error);
+ if (error) {
+ return;
+ }
+ if (skipcc < 0 || skipcc > 1) {
+ semsg(_(e_using_number_as_bool_nr), skipcc);
+ return;
+ }
+ }
+
+ if (nchar > 0) {
+ while (nchar > 0 && (size_t)nbyte < slen) {
+ if (skipcc) {
+ nbyte += utfc_ptr2len(p + nbyte);
+ } else {
+ nbyte += utf_ptr2len(p + nbyte);
+ }
+ nchar--;
+ }
+ } else {
+ nbyte = (int)nchar;
+ }
+ }
+ int len = 0;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ int charlen = (int)tv_get_number(&argvars[2]);
+ while (charlen > 0 && nbyte + len < (int)slen) {
+ int off = nbyte + len;
+
+ if (off < 0) {
+ len += 1;
+ } else {
+ if (skipcc) {
+ len += utfc_ptr2len(p + off);
+ } else {
+ len += utf_ptr2len(p + off);
+ }
+ }
+ charlen--;
+ }
+ } else {
+ len = (int)slen - nbyte; // default: all bytes that are available.
+ }
+
+ // Only return the overlap between the specified part and the actual
+ // string.
+ if (nbyte < 0) {
+ len += nbyte;
+ nbyte = 0;
+ } else if ((size_t)nbyte > slen) {
+ nbyte = (int)slen;
+ }
+ if (len < 0) {
+ len = 0;
+ } else if (nbyte + len > (int)slen) {
+ len = (int)slen - nbyte;
+ }
+
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = xmemdupz(p + nbyte, (size_t)len);
+}
+
+/// "strpart()" function
+void f_strpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ bool error = false;
+
+ const char *const p = tv_get_string(&argvars[0]);
+ const size_t slen = strlen(p);
+
+ varnumber_T n = tv_get_number_chk(&argvars[1], &error);
+ varnumber_T len;
+ if (error) {
+ len = 0;
+ } else if (argvars[2].v_type != VAR_UNKNOWN) {
+ len = tv_get_number(&argvars[2]);
+ } else {
+ len = (varnumber_T)slen - n; // Default len: all bytes that are available.
+ }
+
+ // Only return the overlap between the specified part and the actual
+ // string.
+ if (n < 0) {
+ len += n;
+ n = 0;
+ } else if (n > (varnumber_T)slen) {
+ n = (varnumber_T)slen;
+ }
+ if (len < 0) {
+ len = 0;
+ } else if (n + len > (varnumber_T)slen) {
+ len = (varnumber_T)slen - n;
+ }
+
+ if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) {
+ int off;
+
+ // length in characters
+ for (off = (int)n; off < (int)slen && len > 0; len--) {
+ off += utfc_ptr2len(p + off);
+ }
+ len = off - n;
+ }
+
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = xmemdupz(p + n, (size_t)len);
+}
+
+/// "strridx()" function
+void f_strridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ char buf[NUMBUFLEN];
+ const char *const needle = tv_get_string_chk(&argvars[1]);
+ const char *const haystack = tv_get_string_buf_chk(&argvars[0], buf);
+
+ rettv->vval.v_number = -1;
+ if (needle == NULL || haystack == NULL) {
+ return; // Type error; errmsg already given.
+ }
+
+ const size_t haystack_len = strlen(haystack);
+ ptrdiff_t end_idx;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ // Third argument: upper limit for index.
+ end_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2], NULL);
+ if (end_idx < 0) {
+ return; // Can never find a match.
+ }
+ } else {
+ end_idx = (ptrdiff_t)haystack_len;
+ }
+
+ const char *lastmatch = NULL;
+ if (*needle == NUL) {
+ // Empty string matches past the end.
+ lastmatch = haystack + end_idx;
+ } else {
+ for (const char *rest = haystack; *rest != NUL; rest++) {
+ rest = strstr(rest, needle);
+ if (rest == NULL || rest > haystack + end_idx) {
+ break;
+ }
+ lastmatch = rest;
+ }
+ }
+
+ if (lastmatch != NULL) {
+ rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
+ }
+}
+
+/// "strtrans()" function
+void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true);
+}
+
+/// "utf16idx()" function
+///
+/// Converts a byte or character offset in a string to the corresponding UTF-16
+/// code unit offset.
+void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ if (tv_check_for_string_arg(argvars, 0) == FAIL
+ || tv_check_for_opt_number_arg(argvars, 1) == FAIL
+ || tv_check_for_opt_bool_arg(argvars, 2) == FAIL
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
+ return;
+ }
+
+ const char *const str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
+ if (str == NULL || idx < 0) {
+ return;
+ }
+
+ varnumber_T countcc = false;
+ varnumber_T charidx = false;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ countcc = tv_get_bool(&argvars[2]);
+ if (argvars[3].v_type != VAR_UNKNOWN) {
+ charidx = tv_get_bool(&argvars[3]);
+ }
+ }
+
+ int (*ptr2len)(const char *);
+ if (countcc) {
+ ptr2len = utf_ptr2len;
+ } else {
+ ptr2len = utfc_ptr2len;
+ }
+
+ const char *p;
+ int len;
+ int utf16idx = 0;
+ for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) {
+ if (*p == NUL) {
+ // If the index is exactly the number of bytes or characters in the
+ // string then return the length of the string in utf-16 code units.
+ if (charidx ? (idx == 0) : (p == (str + idx))) {
+ rettv->vval.v_number = len;
+ }
+ return;
+ }
+ utf16idx = len;
+ const int clen = ptr2len(p);
+ const int c = (clen > 1) ? utf_ptr2char(p) : *p;
+ if (c > 0xFFFF) {
+ len++;
+ }
+ p += ptr2len(p);
+ if (charidx) {
+ idx--;
+ }
+ }
+
+ rettv->vval.v_number = utf16idx;
+}
+
+/// "tolower(string)" function
+void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), false);
+}
+
+/// "toupper(string)" function
+void f_toupper(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), true);
+}
+
+/// "tr(string, fromstr, tostr)" function
+void f_tr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ char buf[NUMBUFLEN];
+ char buf2[NUMBUFLEN];
+
+ const char *in_str = tv_get_string(&argvars[0]);
+ const char *fromstr = tv_get_string_buf_chk(&argvars[1], buf);
+ const char *tostr = tv_get_string_buf_chk(&argvars[2], buf2);
+
+ // Default return value: empty string.
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = NULL;
+ if (fromstr == NULL || tostr == NULL) {
+ return; // Type error; errmsg already given.
+ }
+ garray_T ga;
+ ga_init(&ga, (int)sizeof(char), 80);
+
+ // fromstr and tostr have to contain the same number of chars.
+ bool first = true;
+ while (*in_str != NUL) {
+ const char *cpstr = in_str;
+ const int inlen = utfc_ptr2len(in_str);
+ int cplen = inlen;
+ int idx = 0;
+ int fromlen;
+ for (const char *p = fromstr; *p != NUL; p += fromlen) {
+ fromlen = utfc_ptr2len(p);
+ if (fromlen == inlen && strncmp(in_str, p, (size_t)inlen) == 0) {
+ int tolen;
+ for (p = tostr; *p != NUL; p += tolen) {
+ tolen = utfc_ptr2len(p);
+ if (idx-- == 0) {
+ cplen = tolen;
+ cpstr = p;
+ break;
+ }
+ }
+ if (*p == NUL) { // tostr is shorter than fromstr.
+ goto error;
+ }
+ break;
+ }
+ idx++;
+ }
+
+ if (first && cpstr == in_str) {
+ // Check that fromstr and tostr have the same number of
+ // (multi-byte) characters. Done only once when a character
+ // of in_str doesn't appear in fromstr.
+ first = false;
+ int tolen;
+ for (const char *p = tostr; *p != NUL; p += tolen) {
+ tolen = utfc_ptr2len(p);
+ idx--;
+ }
+ if (idx != 0) {
+ goto error;
+ }
+ }
+
+ ga_grow(&ga, cplen);
+ memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
+ ga.ga_len += cplen;
+
+ in_str += inlen;
+ }
+
+ // add a terminating NUL
+ ga_append(&ga, NUL);
+
+ rettv->vval.v_string = ga.ga_data;
+ return;
+error:
+ semsg(_(e_invarg2), fromstr);
+ ga_clear(&ga);
+}
+
+/// "trim({expr})" function
+void f_trim(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ char buf1[NUMBUFLEN];
+ char buf2[NUMBUFLEN];
+ const char *head = tv_get_string_buf_chk(&argvars[0], buf1);
+ const char *mask = NULL;
+ const char *prev;
+ const char *p;
+ int dir = 0;
+
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string = NULL;
+ if (head == NULL) {
+ return;
+ }
+
+ if (tv_check_for_opt_string_arg(argvars, 1) == FAIL) {
+ return;
+ }
+
+ if (argvars[1].v_type == VAR_STRING) {
+ mask = tv_get_string_buf_chk(&argvars[1], buf2);
+ if (*mask == NUL) {
+ mask = NULL;
+ }
+
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ bool error = false;
+ // leading or trailing characters to trim
+ dir = (int)tv_get_number_chk(&argvars[2], &error);
+ if (error) {
+ return;
+ }
+ if (dir < 0 || dir > 2) {
+ semsg(_(e_invarg2), tv_get_string(&argvars[2]));
+ return;
+ }
+ }
+ }
+
+ if (dir == 0 || dir == 1) {
+ // Trim leading characters
+ while (*head != NUL) {
+ int c1 = utf_ptr2char(head);
+ if (mask == NULL) {
+ if (c1 > ' ' && c1 != 0xa0) {
+ break;
+ }
+ } else {
+ for (p = mask; *p != NUL; MB_PTR_ADV(p)) {
+ if (c1 == utf_ptr2char(p)) {
+ break;
+ }
+ }
+ if (*p == NUL) {
+ break;
+ }
+ }
+ MB_PTR_ADV(head);
+ }
+ }
+
+ const char *tail = head + strlen(head);
+ if (dir == 0 || dir == 2) {
+ // Trim trailing characters
+ for (; tail > head; tail = prev) {
+ prev = tail;
+ MB_PTR_BACK(head, prev);
+ int c1 = utf_ptr2char(prev);
+ if (mask == NULL) {
+ if (c1 > ' ' && c1 != 0xa0) {
+ break;
+ }
+ } else {
+ for (p = mask; *p != NUL; MB_PTR_ADV(p)) {
+ if (c1 == utf_ptr2char(p)) {
+ break;
+ }
+ }
+ if (*p == NUL) {
+ break;
+ }
+ }
+ }
+ }
+ rettv->vval.v_string = xstrnsave(head, (size_t)(tail - head));
+}