From c11a6613625008c7d45702301cdf404873674c58 Mon Sep 17 00:00:00 2001 From: TJ DeVries Date: Tue, 1 Sep 2020 22:00:55 -0400 Subject: [PATCH] feat: new fuzzy sorter --- README.md | 81 ++++++++++++---------- lua/telescope/builtin.lua | 4 +- lua/telescope/pickers.lua | 2 + lua/telescope/sorters.lua | 131 ++++++++++++++++++++++++++++++++++- lua/tests/telescope_spec.lua | 20 ++++-- scratch/test_fuzzy_file.lua | 20 ++++++ 6 files changed, 212 insertions(+), 46 deletions(-) create mode 100644 scratch/test_fuzzy_file.lua diff --git a/README.md b/README.md index 1f2fb30..fef57e2 100644 --- a/README.md +++ b/README.md @@ -63,46 +63,46 @@ wrappers over common tasks). ```lua require'telescope.builtin'.git_files{ - -- See Picker for additional options - show_preview = true, -- Show preview - prompt = "Git File", - selection_strategy = "reset" -- follow, reset, line + -- See Picker for additional options + show_preview = true, -- Show preview + prompt = "Git File", + selection_strategy = "reset" -- follow, reset, line } ``` ```lua require'telescope.builtin'.live_grep{ - -- See Picker for additional options - prompt = "Live Grep", + -- See Picker for additional options + prompt = "Live Grep", } ``` ```lua require'telescope.builtin'.lsp_references{ - -- See Picker for additional options - prompt = 'LSP References' + -- See Picker for additional options + prompt = 'LSP References' } ``` ```lua require'telescope.builtin'.quickfix{ - -- See Picker for additional options - prompt = 'Quickfix' + -- See Picker for additional options + prompt = 'Quickfix' } ``` ```lua require'telescope.builtin'.grep_string{ - -- See Picker for additional options - prompt = 'Find Word', - search = false -- Search term or + -- See Picker for additional options + prompt = 'Find Word', + search = false -- Search term or } ``` ```lua require'telescope.builtin'.oldfiles{ - -- See Picker for additional options - prompt = 'Oldfiles', + -- See Picker for additional options + prompt = 'Oldfiles', } ``` @@ -121,15 +121,20 @@ require'telescope.builtin'.oldfiles{ ```lua -- lua/telescope/finders.lua Finder:new{ - entry_maker = function(line) end, - fn_command = function() { command = "", args = { "ls-files" } } end, - static = false, - maximum_results = false + entry_maker = function(line) end, + fn_command = function() { command = "", args = { "ls-files" } } end, + static = false, + maximum_results = false } ``` + `Sorter`: - A `Sorter` is called by the `Picker` on each item returned by the `Finder`. -- `Sorter`s return a number, which is equivalent to the "distance" between two +- `Sorter`s return a number, which is equivalent to the "distance" between the current `prompt` and the `entry` returned by a `finder`. + - Currently, it's not possible to delay calling the `Sorter` until the end of the execution, it is called on each item as we receive them. + - This was done this way so that long running / expensive searches can be instantly searchable and we don't have to wait til it completes for things to start being worked on. + - However, this prevents using some tools, like FZF easily. + - In the future, I'll probably add a mode where you can delay the sorting til the end, so you can use more traditional sorting tools. "picker": @@ -145,25 +150,25 @@ Defaults: ```lua -- lua/telescope/pickers.lua Picker:new{ - prompt = "Git Files", -- REQUIRED - finder = FUNCTION, -- REQUIRED - sorter = FUNCTION, -- REQUIRED - previewer = FUNCTION, -- REQUIRED - mappings = { - i = { - [""] = require'telescope.actions'.move_selection_next, - [""] = require'telescope.actions'.move_selection_previous, - [""] = require'telescope.actions'.goto_file_selection, - }, + prompt = "Git Files", -- REQUIRED + finder = FUNCTION, -- REQUIRED + sorter = FUNCTION, -- REQUIRED + previewer = FUNCTION, -- REQUIRED + mappings = { + i = { + [""] = require'telescope.actions'.move_selection_next, + [""] = require'telescope.actions'.move_selection_previous, + [""] = require'telescope.actions'.goto_file_selection, + }, - n = { - [""] = require'telescope.actions'.close, - } - }, - selection_strategy = "reset", -- follow, reset, line - border = {}, - borderchars = { '─', '│', '─', '│', '┌', '┐', '┘', '└'}, - preview_cutoff = 120 + n = { + [""] = require'telescope.actions'.close, + } + }, + selection_strategy = "reset", -- follow, reset, line + border = {}, + borderchars = { '─', '│', '─', '│', '┌', '┐', '┘', '└'}, + preview_cutoff = 120 } ``` diff --git a/lua/telescope/builtin.lua b/lua/telescope/builtin.lua index 90fc70f..f6b665e 100644 --- a/lua/telescope/builtin.lua +++ b/lua/telescope/builtin.lua @@ -29,12 +29,14 @@ builtin.git_files = function(opts) end) or nil + pickers.new(opts, { prompt = 'Git File', finder = finders.new_oneshot_job({ "git", "ls-files" }, make_entry), previewer = previewers.cat, - sorter = sorters.get_norcalli_sorter(), + sorter = sorters.get_fuzzy_file(), }):find() + end builtin.live_grep = function(opts) diff --git a/lua/telescope/pickers.lua b/lua/telescope/pickers.lua index 2650ff0..4b450f2 100644 --- a/lua/telescope/pickers.lua +++ b/lua/telescope/pickers.lua @@ -276,6 +276,8 @@ function Picker:find() end local process_complete = vim.schedule_wrap(function() + -- TODO: We should either: always leave one result or make sure we actually clean up the results when nothing matches + if selection_strategy == 'row' then self:set_selection(self:get_selection_row()) elseif selection_strategy == 'follow' then diff --git a/lua/telescope/sorters.lua b/lua/telescope/sorters.lua index b8ee599..6074f1b 100644 --- a/lua/telescope/sorters.lua +++ b/lua/telescope/sorters.lua @@ -73,13 +73,138 @@ end -- TODO: Match on upper case words -- TODO: Match on last match -sorters.get_fuzzy_file = function() - local cached_tails = {} +sorters.get_fuzzy_file = function(opts) + opts = opts or {} + + local ngram_len = opts.ngram_len or 2 + local os_sep = '/' + + local cached_tails = setmetatable({}, { + __index = function(t, k) + local tail_split = vim.split(k, os_sep) + local tail = tail_split[#tail_split] + + rawset(t, k, tail) + return tail + end, + }) + + local cached_uppers = setmetatable({}, { + __index = function(t, k) + local obj = {} + for i = 1, #k do + local s = k:sub(i, i) + local s_byte = s:byte() + if s_byte <= 90 and s_byte >= 65 then + obj[s] = true + end + end + + rawset(t, k, obj) + return obj + end + }) + local cached_ngrams = {} + local function overlapping_ngrams(s, n) + if cached_ngrams[s] and cached_ngrams[s][n] then + return cached_ngrams[s][n] + end + + local R = {} + for i = 1, s:len() - n + 1 do + R[#R+1] = s:sub(i, i+n-1) + end + + if not cached_ngrams[s] then + cached_ngrams[s] = {} + end + + cached_ngrams[s][n] = R + + return R + end + return Sorter:new { scoring_function = function(_, prompt, line) - return 1 + local N = #prompt + + if prompt == 0 or N < ngram_len then + -- TODO: If the character is in the line, + -- then it should get a point or somethin. + return 0 + end + + local prompt_lower = prompt:lower() + local line_lower = line:lower() + + local prompt_lower_ngrams = overlapping_ngrams(prompt_lower, ngram_len) + + -- Contains the original string + local contains_string = line_lower:find(prompt_lower, 1, true) + + local prompt_uppers = cached_uppers[prompt] + local line_uppers = cached_uppers[line] + + local uppers_matching = 0 + for k, _ in pairs(prompt_uppers) do + if line_uppers[k] then + uppers_matching = uppers_matching + 1 + end + end + + -- TODO: Consider case senstivity + local tail = cached_tails[line_lower] + local contains_tail = tail:find(prompt, 1, true) + + local consecutive_matches = 0 + local previous_match_index = 0 + local match_count = 0 + + for i = 1, #prompt_lower_ngrams do + local match_start = line_lower:find(prompt_lower_ngrams[i], 1, true) + if match_start then + match_count = match_count + 1 + if match_start > previous_match_index then + consecutive_matches = consecutive_matches + 1 + end + + previous_match_index = match_start + end + end + + local tail_modifier = 1 + if contains_tail then + tail_modifier = 2 + end + + -- TODO: Copied from ashkan. + local denominator = ( + (10 * match_count / #prompt_lower_ngrams) + -- biases for shorter strings + -- TODO(ashkan): this can bias towards repeated finds of the same + -- subpattern with overlapping_ngrams + + 3 * match_count * ngram_len / #line + + consecutive_matches + + N / (contains_string or (2 * #line)) + -- + 30/(c1 or 2*N) + + -- TODO: It might be possible that this too strongly correlates, + -- but it's unlikely for people to type capital letters without actually + -- wanting to do something with a capital letter in it. + + uppers_matching + ) * tail_modifier + + if denominator == 0 or denominator ~= denominator then + return -1 + end + + if #prompt > 2 and denominator < 0.5 then + return -1 + end + + return 1 / denominator end } end diff --git a/lua/tests/telescope_spec.lua b/lua/tests/telescope_spec.lua index 8d20c5d..4579902 100644 --- a/lua/tests/telescope_spec.lua +++ b/lua/tests/telescope_spec.lua @@ -237,13 +237,25 @@ describe('Sorters', function() it('sort matches well', function() local sorter = require('telescope.sorters').get_fuzzy_file() - local exact_match = sorter:score('hello', 'hello') + local exact_match = sorter:score('abcdef', 'abcdef') local no_match = sorter:score('abcdef', 'ghijkl') local ok_match = sorter:score('abcdef', 'ab') - assert(exact_match < no_match) - assert(exact_match < ok_match) - assert(ok_match < no_match) + assert( + exact_match < no_match, + string.format("Exact match better than no match: %s %s", exact_match, no_match) + ) + assert(exact_match < ok_match, "Exact match better than OK match") + assert(ok_match < no_match, "OK match better than no match") + end) + + it('sorts matches after last os sep better', function() + local sorter = require('telescope.sorters').get_fuzzy_file() + + local exact_match = sorter:score('aaa/bbb', 'aaa') + local ok_match = sorter:score('bbb/aaa', 'aaa') + + assert(exact_match < ok_match, "Exact match better than OK match") end) -- it('sorts multiple finds better', function() diff --git a/scratch/test_fuzzy_file.lua b/scratch/test_fuzzy_file.lua new file mode 100644 index 0000000..85d083a --- /dev/null +++ b/scratch/test_fuzzy_file.lua @@ -0,0 +1,20 @@ +RELOAD('telescope') + +local sorter = require('telescope.sorters').get_fuzzy_file() + +-- Test for tail. +assert(sorter:score("aaa", "aaa/bbb") > sorter:score("aaa", "bbb/aaa")) +assert( + sorter:score("path", "/path/to/directory/file.txt") + > sorter:score("path", "/file/to/directory/path.txt") +) + +-- Matches well for UpperCase (basically just bonus points for having uppercase letters) +assert(sorter:score("AAA", "/blah/this/aaa/that") > sorter:score("AAA", "/blah/this/AAA/that")) + +-- TODO: Determine our strategy for these +-- TODO: Make it so that capital letters count extra for being after a path sep. +-- assert(sorter:score("ftp", "/folder/to/python") > sorter:score("FTP", "/folder/to/python")) + +-- TODO: Make it so that +-- assert(sorter:score("build", "/home/tj/build/neovim") > sorter:score("htbn", "/home/tj/build/neovim"))