aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBjörn Linse <bjorn.linse@gmail.com>2019-08-04 12:22:22 +0200
committerBjörn Linse <bjorn.linse@gmail.com>2019-08-06 20:24:36 +0200
commitc0993ed3433ef4111a39e59642d15b15261e8b68 (patch)
treecb2a4225d70dec491f96412f96ebcb7c5a391e9a /src
parentb0e26199ec02c9b392af6161522004c55db0441f (diff)
downloadrneovim-c0993ed3433ef4111a39e59642d15b15261e8b68.tar.gz
rneovim-c0993ed3433ef4111a39e59642d15b15261e8b68.tar.bz2
rneovim-c0993ed3433ef4111a39e59642d15b15261e8b68.zip
lua: support getting UTF-32 and UTF-16 sizes of replaced text
Diffstat (limited to 'src')
-rw-r--r--src/nvim/api/buffer.c11
-rw-r--r--src/nvim/buffer_defs.h17
-rw-r--r--src/nvim/buffer_updates.c16
-rw-r--r--src/nvim/fileio.c2
-rw-r--r--src/nvim/globals.h2
-rw-r--r--src/nvim/mbyte.c33
-rw-r--r--src/nvim/memline.c48
-rw-r--r--src/nvim/misc1.c4
8 files changed, 113 insertions, 20 deletions
diff --git a/src/nvim/api/buffer.c b/src/nvim/api/buffer.c
index 497b4ae9a4..c6f82e9d85 100644
--- a/src/nvim/api/buffer.c
+++ b/src/nvim/api/buffer.c
@@ -109,9 +109,11 @@ String buffer_get_line(Buffer buffer, Integer index, Error *err)
/// `nvim_buf_lines_event`. Otherwise, the first notification will be
/// a `nvim_buf_changedtick_event`. Not used for lua callbacks.
/// @param opts Optional parameters.
-/// `on_lines`: lua callback received on change.
+/// `on_lines`: lua callback received on change.
/// `on_changedtick`: lua callback received on changedtick
/// increment without text change.
+/// `utf_sizes`: include UTF-32 and UTF-16 size of
+/// the replaced region.
/// See |api-buffer-updates-lua| for more information
/// @param[out] err Error details, if any
/// @return False when updates couldn't be enabled because the buffer isn't
@@ -156,6 +158,12 @@ Boolean nvim_buf_attach(uint64_t channel_id,
}
cb.on_detach = v->data.luaref;
v->data.integer = LUA_NOREF;
+ } else if (is_lua && strequal("utf_sizes", k.data)) {
+ if (v->type != kObjectTypeBoolean) {
+ api_set_error(err, kErrorTypeValidation, "utf_sizes must be boolean");
+ goto error;
+ }
+ cb.utf_sizes = v->data.boolean;
} else {
api_set_error(err, kErrorTypeValidation, "unexpected key: %s", k.data);
goto error;
@@ -1196,6 +1204,7 @@ Dictionary nvim__buf_stats(Buffer buffer, Error *err)
// NB: this should be zero at any time API functions are called,
// this exists to debug issues
PUT(rv, "dirty_bytes", INTEGER_OBJ((Integer)buf->deleted_bytes));
+
return rv;
}
diff --git a/src/nvim/buffer_defs.h b/src/nvim/buffer_defs.h
index eb26e4ad8e..b11eaefdd0 100644
--- a/src/nvim/buffer_defs.h
+++ b/src/nvim/buffer_defs.h
@@ -459,8 +459,9 @@ typedef struct {
LuaRef on_lines;
LuaRef on_changedtick;
LuaRef on_detach;
+ bool utf_sizes;
} BufUpdateCallbacks;
-#define BUF_UPDATE_CALLBACKS_INIT { LUA_NOREF, LUA_NOREF, LUA_NOREF }
+#define BUF_UPDATE_CALLBACKS_INIT { LUA_NOREF, LUA_NOREF, LUA_NOREF, false }
#define BUF_HAS_QF_ENTRY 1
#define BUF_HAS_LL_ENTRY 2
@@ -802,12 +803,24 @@ struct file_buffer {
kvec_t(BufhlLine *) b_bufhl_move_space; // temporary space for highlights
- // array of channelids which have asked to receive updates for this
+ // array of channel_id:s which have asked to receive updates for this
// buffer.
kvec_t(uint64_t) update_channels;
+ // array of lua callbacks for buffer updates.
kvec_t(BufUpdateCallbacks) update_callbacks;
+ // whether an update callback has requested codepoint size of deleted regions.
+ bool update_need_codepoints;
+
+ // Measurements of the deleted or replaced region since the last update
+ // event. Some consumers of buffer changes need to know the byte size (like
+ // tree-sitter) or the corresponding UTF-32/UTF-16 size (like LSP) of the
+ // deleted text.
size_t deleted_bytes;
+ size_t deleted_codepoints;
+ size_t deleted_codeunits;
+
+ // The number for times the current line has been flushed in the memline.
int flush_count;
int b_diff_failed; // internal diff failed for this buffer
diff --git a/src/nvim/buffer_updates.c b/src/nvim/buffer_updates.c
index 7dea8bfac5..3604578b50 100644
--- a/src/nvim/buffer_updates.c
+++ b/src/nvim/buffer_updates.c
@@ -26,6 +26,9 @@ bool buf_updates_register(buf_T *buf, uint64_t channel_id,
if (channel_id == LUA_INTERNAL_CALL) {
kv_push(buf->update_callbacks, cb);
+ if (cb.utf_sizes) {
+ buf->update_need_codepoints = true;
+ }
return true;
}
@@ -169,7 +172,9 @@ void buf_updates_send_changes(buf_T *buf,
int64_t num_removed,
bool send_tick)
{
- size_t deleted_bytes = ml_flush_deleted_bytes(buf);
+ size_t deleted_codepoints, deleted_codeunits;
+ size_t deleted_bytes = ml_flush_deleted_bytes(buf, &deleted_codepoints,
+ &deleted_codeunits);
if (!buf_updates_active(buf)) {
return;
@@ -233,8 +238,8 @@ void buf_updates_send_changes(buf_T *buf,
bool keep = true;
if (cb.on_lines != LUA_NOREF) {
Array args = ARRAY_DICT_INIT;
- Object items[6];
- args.size = 6;
+ Object items[8];
+ args.size = 6; // may be increased to 8 below
args.items = items;
// the first argument is always the buffer handle
@@ -254,6 +259,11 @@ void buf_updates_send_changes(buf_T *buf,
// byte count of previous contents
args.items[5] = INTEGER_OBJ((Integer)deleted_bytes);
+ if (cb.utf_sizes) {
+ args.size = 8;
+ args.items[6] = INTEGER_OBJ((Integer)deleted_codepoints);
+ args.items[7] = INTEGER_OBJ((Integer)deleted_codeunits);
+ }
textlock++;
Object res = executor_exec_lua_cb(cb.on_lines, "lines", args, true);
textlock--;
diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c
index 2232de8c1e..d03b9138d0 100644
--- a/src/nvim/fileio.c
+++ b/src/nvim/fileio.c
@@ -1756,6 +1756,8 @@ failed:
linecnt--;
}
curbuf->deleted_bytes = 0;
+ curbuf->deleted_codepoints = 0;
+ curbuf->deleted_codeunits = 0;
linecnt = curbuf->b_ml.ml_line_count - linecnt;
if (filesize == 0)
linecnt = 0;
diff --git a/src/nvim/globals.h b/src/nvim/globals.h
index de6f59b3f1..4524c4b2c0 100644
--- a/src/nvim/globals.h
+++ b/src/nvim/globals.h
@@ -627,6 +627,8 @@ EXTERN pos_T Insstart_orig;
EXTERN int orig_line_count INIT(= 0); /* Line count when "gR" started */
EXTERN int vr_lines_changed INIT(= 0); /* #Lines changed by "gR" so far */
+// increase around internal delete/replace
+EXTERN int inhibit_delete_count INIT(= 0);
/*
* These flags are set based upon 'fileencoding'.
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index e7579399f3..bf8ce46113 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1438,6 +1438,39 @@ int utf16_to_utf8(const wchar_t *strw, char **str)
#endif
+/// Measure the length of a string in corresponding UTF-32 and UTF-16 units.
+///
+/// Invalid UTF-8 bytes, or embedded surrogates, count as one code point/unit
+/// each.
+///
+/// The out parameters are incremented. This is used to measure the size of
+/// a buffer region consisting of multiple line segments.
+///
+/// @param s the string
+/// @param len maximum length (an earlier NUL terminates)
+/// @param[out] codepoints incremented with UTF-32 code point size
+/// @param[out] codeunits incremented with UTF-16 code unit size
+void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
+ size_t *codeunits)
+ FUNC_ATTR_NONNULL_ALL
+{
+ size_t count = 0, extra = 0;
+ size_t clen;
+ for (size_t i = 0; i < len && s[i] != NUL; i += clen) {
+ clen = utf_ptr2len_len(s+i, len-i);
+ // NB: gets the byte value of invalid sequence bytes.
+ // we only care whether the char fits in the BMP or not
+ int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
+ count++;
+ if (c > 0xFFFF) {
+ extra++;
+ }
+ }
+ *codepoints += count;
+ *codeunits += count + extra;
+}
+
+
/*
* Version of strnicmp() that handles multi-byte characters.
* Needed for Big5, Shift-JIS and UTF-8 encoding. Other DBCS encodings can
diff --git a/src/nvim/memline.c b/src/nvim/memline.c
index 0b16f86416..3220c7d9b8 100644
--- a/src/nvim/memline.c
+++ b/src/nvim/memline.c
@@ -2383,6 +2383,23 @@ static int ml_append_int(
return OK;
}
+void ml_add_deleted_len(char_u *ptr, ssize_t len)
+{
+ if (inhibit_delete_count) {
+ return;
+ }
+ if (len == -1) {
+ len = STRLEN(ptr);
+ }
+ curbuf->deleted_bytes += len+1;
+ if (curbuf->update_need_codepoints) {
+ mb_utflen(ptr, len, &curbuf->deleted_codepoints,
+ &curbuf->deleted_codeunits);
+ curbuf->deleted_codepoints++; // NL char
+ curbuf->deleted_codeunits++;
+ }
+}
+
/*
* Replace line lnum, with buffering, in current buffer.
*
@@ -2408,19 +2425,17 @@ int ml_replace(linenr_T lnum, char_u *line, bool copy)
if (copy) {
line = vim_strsave(line);
}
- if (curbuf->b_ml.ml_line_lnum != lnum) { /* other line buffered */
- ml_flush_line(curbuf); /* flush it */
- } else if (curbuf->b_ml.ml_flags & ML_LINE_DIRTY) { /* same line allocated */
- // TODO FIXME: see other "TODO FIXME"
- curbuf->deleted_bytes += STRLEN(curbuf->b_ml.ml_line_ptr)+1;
- xfree(curbuf->b_ml.ml_line_ptr); /* free it */
- readlen = false; // already read it.
+ if (curbuf->b_ml.ml_line_lnum != lnum) { // other line buffered
+ ml_flush_line(curbuf); // flush it
+ } else if (curbuf->b_ml.ml_flags & ML_LINE_DIRTY) { // same line allocated
+ ml_add_deleted_len(curbuf->b_ml.ml_line_ptr, -1);
+ readlen = false; // already added the length
+
+ xfree(curbuf->b_ml.ml_line_ptr); // free it
}
- if (readlen) {
- if (true) { // TODO: buffer updates active
- curbuf->deleted_bytes += STRLEN(ml_get_buf(curbuf, lnum, false))+1;
- }
+ if (readlen && kv_size(curbuf->update_callbacks)) {
+ ml_add_deleted_len(ml_get_buf(curbuf, lnum, false), -1);
}
curbuf->b_ml.ml_line_ptr = line;
@@ -2504,7 +2519,10 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message)
else
line_size = ((dp->db_index[idx - 1]) & DB_INDEX_MASK) - line_start;
- buf->deleted_bytes += line_size;
+ // Line should always have an NL char internally (represented as NUL),
+ // even if 'noeol' is set.
+ assert(line_size >= 1);
+ ml_add_deleted_len((char_u *)dp + line_start, line_size-1);
/*
* special case: If there is only one line in the data block it becomes empty.
@@ -2690,10 +2708,14 @@ void ml_clearmarked(void)
return;
}
-size_t ml_flush_deleted_bytes(buf_T *buf)
+size_t ml_flush_deleted_bytes(buf_T *buf, size_t *codepoints, size_t *codeunits)
{
size_t ret = buf->deleted_bytes;
+ *codepoints = buf->deleted_codepoints;
+ *codeunits = buf->deleted_codeunits;
buf->deleted_bytes = 0;
+ buf->deleted_codepoints = 0;
+ buf->deleted_codeunits = 0;
return ret;
}
diff --git a/src/nvim/misc1.c b/src/nvim/misc1.c
index 112ca6f287..a62fa6d585 100644
--- a/src/nvim/misc1.c
+++ b/src/nvim/misc1.c
@@ -780,6 +780,7 @@ open_line (
did_append = FALSE;
}
+ inhibit_delete_count++;
if (newindent
|| did_si
) {
@@ -821,6 +822,7 @@ open_line (
did_si = false;
}
}
+ inhibit_delete_count--;
/*
* In REPLACE mode, for each character in the extra leader, there must be
@@ -1685,7 +1687,7 @@ int del_bytes(colnr_T count, bool fixpos_arg, bool use_delcombine)
bool was_alloced = ml_line_alloced(); // check if oldp was allocated
char_u *newp;
if (was_alloced) {
- curbuf->deleted_bytes += (size_t)oldlen+1;
+ ml_add_deleted_len(curbuf->b_ml.ml_line_ptr, oldlen);
newp = oldp; // use same allocated memory
} else { // need to allocate a new line
newp = xmalloc((size_t)(oldlen + 1 - count));