Streamed some refactoring. More work to do

This commit is contained in:
TJ DeVries
2020-07-31 00:05:22 -04:00
parent ababfbfd88
commit fa0382d93e
12 changed files with 555 additions and 112 deletions

270
scratch/bk_tree.lua Normal file
View File

@@ -0,0 +1,270 @@
---------------------------
-- bk-tree datastructure
--
-- http://en.wikipedia.org/wiki/BK-tree
-- @module bk-tree
-- @author Robin Hübner
-- robinhubner@gmail.com
-- @release version 1.0.2
-- @license MIT
local bk_tree = {}
local function lazy_copy(t1)
local cp = {}
for k, v in pairs(t1) do
cp[k] = v
end
return cp
end
local function min(a, b, c)
local min_val = a
if b < min_val then min_val = b end
if c < min_val then min_val = c end
return min_val
end
----------------------------------
--- Levenshtein distance function.
-- @tparam string s1
-- @tparam string s2
-- @treturn number the levenshtein distance
-- @within Metrics
function bk_tree.levenshtein_dist(s1, s2)
if s1 == s2 then return 0 end
if s1:len() == 0 then return s2:len() end
if s2:len() == 0 then return s1:len() end
if s1:len() < s2:len() then s1, s2 = s2, s1 end
t = {}
for i=1, #s1+1 do
t[i] = {i-1}
end
for i=1, #s2+1 do
t[1][i] = i-1
end
local cost
for i=2, #s1+1 do
for j=2, #s2+1 do
cost = (s1:sub(i-1,i-1) == s2:sub(j-1,j-1) and 0) or 1
t[i][j] = min(
t[i-1][j] + 1,
t[i][j-1] + 1,
t[i-1][j-1] + cost)
end
end
return t[#s1+1][#s2+1]
end
function bk_tree.hook(param)
local name, callee = debug.getlocal(2, 1)
local f = debug.getinfo(2, "f").func
local p = debug.getinfo(3, "f").func
--[[ previous function in the callstack, if called from the same place,
don't add to the insert/remove counters. ]]--
if f == bk_tree.insert and p ~= bk_tree.insert then
callee.stats.nodes = callee.stats.nodes + 1
elseif f == bk_tree.remove and p ~= bk_tree.remove then
callee.stats.nodes = callee.stats.nodes - 1
elseif f == bk_tree.query and p == bk_tree.query then
callee.stats.queries = callee.stats.queries + 1
end
end
--- Hooks debugging into tree execution.
-- Keeps track of number of nodes created, queries made,
-- note that this must be run directly after tree is created
-- in order to get correct information.
-- @within Debug
--- @usage
-- local bktree = require "bk-tree"
-- local tree = bktree:new("word")
-- tree:debug()
-- tree:insert("perceive")
-- tree:insert("beautiful")
-- tree:insert("definitely")
-- local result = tree:query("definately", 3)
-- tree:print_stats()
--
-- -- output
-- Nodes: 4
-- Queries: 3
-- Nodes Queried: 75%
function bk_tree:debug()
local nc = 0
if self.root then nc = 1 end
self.stats = { nodes = nc, queries = 0 }
debug.sethook(self.hook, "c")
end
--- Print execution stats.
-- Prints nodes queried and total nodes, as well as a fraction of
-- nodes visited to satisfy the query, resets the counter of nodes queried when called.
-- @within Debug
-- @see debug
function bk_tree:print_stats()
print("\nNodes: " .. self.stats.nodes)
print("Queries: " .. self.stats.queries)
print("Nodes Queried: " .. self.stats.queries/self.stats.nodes*100 .. "%\n")
self.stats.queries = 0
end
--- Fetch execution stats.
-- Returns a copy of the execution stats that @{print_stats} would print, requires debug to have been enabled
-- to not just return defaults. Useful if you want to profile things.
-- @within Debug
-- @return {key = value,...}
function bk_tree:get_stats()
return lazy_copy(self.stats)
end
---------------------------
--- Creates a new bk-tree.
-- @constructor
-- @string[opt] root_word the root of the new tree
-- @tparam[opt=levenshtein_dist] function dist_func the distance function used
-- @see levenshtein_dist
-- @return the new bk-tree instance
--- @usage
-- local bktree = require "bk-tree"
-- local tree = bktree:new("word")
function bk_tree:new(root_word, dist_func)
local n_obj = {}
if root_word then n_obj.root = { str = root_word, children = {} } end
n_obj.dist_func = dist_func or self.levenshtein_dist
setmetatable(n_obj, self)
self.__index = self
return n_obj
end
--------------------------------
--- Inserts word into the tree.
-- @string word
-- @treturn bool true if inserted, false if word already exists in tree
--- @usage
-- local bktree = require "bk-tree"
-- local tree = bktree:new("root")
-- local success = tree:insert("other_word")
function bk_tree:insert(word, node)
node = node or self.root
if not node then
self.root = { str = word, children = {} }
return true
end
local dist = self.dist_func(word, node.str)
if dist == 0 then return false end
local some_node = node.children[dist]
if not some_node then
node.children[dist] = { str = word, children = {} }
return true
end
return self:insert(word, some_node)
end
--------------------------------
--- Query the tree for matches.
-- @string word
-- @tparam number n max edit distance to use when querying
-- @treturn {{str=string,distance=number},....} table of tables with matching words, empty table if no matches
--- @usage
-- local bktree = require "bk-tree"
-- local tree = bktree:new("word")
-- tree:insert("hello")
-- tree:insert("goodbye")
-- tree:insert("woop")
-- local result = tree:query("woop", 1)
function bk_tree:query(word, n, node, matches)
node = node or self.root
matches = matches or {}
if not node then return matches end
local dist = self.dist_func(word, node.str)
if dist <= n then matches[#matches+1] = {str = node.str, distance = dist} end
for k, child in pairs(node.children) do
if child ~= nil then
if k >= dist-n and k <= dist+n then
self:query(word, n, child, matches)
end
end
end
return matches
end
---------------------------------------------------------
--- Queries the the tree for a match, sorts the results.
-- Calls @{query} and returns the results sorted.
-- @string word
-- @tparam number n max edit distance to use when querying
-- @treturn {{str=string,distance=number},....} table of tables with matching words sorted by distance, empty table if no matches
--- @usage
-- local bktree = require "bk-tree"
-- local tree = bktree:new("word")
-- tree:insert("woop")
-- tree:insert("worp")
-- tree:insert("warp")
-- local result = tree:query_sorted("woop", 3)
function bk_tree:query_sorted(word, n)
local result = self:query(word, n)
table.sort(result, function(a,b) return a.distance < b.distance end)
return result
end
local tree = bk_tree:new("word")
tree:insert("hello")
tree:insert("welp")
tree:insert("function")
tree:insert("this long line what")
tree:insert("what this long line what")
print(vim.inspect(tree))
print(vim.inspect(tree:query_sorted("what", 3)))
return bk_tree