Streamed some refactoring. More work to do
This commit is contained in:
270
scratch/bk_tree.lua
Normal file
270
scratch/bk_tree.lua
Normal file
@@ -0,0 +1,270 @@
|
||||
|
||||
---------------------------
|
||||
-- bk-tree datastructure
|
||||
--
|
||||
-- http://en.wikipedia.org/wiki/BK-tree
|
||||
-- @module bk-tree
|
||||
-- @author Robin Hübner
|
||||
-- robinhubner@gmail.com
|
||||
-- @release version 1.0.2
|
||||
-- @license MIT
|
||||
|
||||
local bk_tree = {}
|
||||
|
||||
|
||||
local function lazy_copy(t1)
|
||||
|
||||
local cp = {}
|
||||
|
||||
for k, v in pairs(t1) do
|
||||
cp[k] = v
|
||||
end
|
||||
|
||||
return cp
|
||||
|
||||
end
|
||||
|
||||
local function min(a, b, c)
|
||||
|
||||
local min_val = a
|
||||
|
||||
if b < min_val then min_val = b end
|
||||
if c < min_val then min_val = c end
|
||||
|
||||
return min_val
|
||||
|
||||
end
|
||||
|
||||
----------------------------------
|
||||
--- Levenshtein distance function.
|
||||
-- @tparam string s1
|
||||
-- @tparam string s2
|
||||
-- @treturn number the levenshtein distance
|
||||
-- @within Metrics
|
||||
function bk_tree.levenshtein_dist(s1, s2)
|
||||
|
||||
if s1 == s2 then return 0 end
|
||||
if s1:len() == 0 then return s2:len() end
|
||||
if s2:len() == 0 then return s1:len() end
|
||||
if s1:len() < s2:len() then s1, s2 = s2, s1 end
|
||||
|
||||
t = {}
|
||||
for i=1, #s1+1 do
|
||||
t[i] = {i-1}
|
||||
end
|
||||
|
||||
for i=1, #s2+1 do
|
||||
t[1][i] = i-1
|
||||
end
|
||||
|
||||
local cost
|
||||
for i=2, #s1+1 do
|
||||
|
||||
for j=2, #s2+1 do
|
||||
cost = (s1:sub(i-1,i-1) == s2:sub(j-1,j-1) and 0) or 1
|
||||
t[i][j] = min(
|
||||
t[i-1][j] + 1,
|
||||
t[i][j-1] + 1,
|
||||
t[i-1][j-1] + cost)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return t[#s1+1][#s2+1]
|
||||
|
||||
end
|
||||
|
||||
function bk_tree.hook(param)
|
||||
|
||||
local name, callee = debug.getlocal(2, 1)
|
||||
local f = debug.getinfo(2, "f").func
|
||||
local p = debug.getinfo(3, "f").func
|
||||
--[[ previous function in the callstack, if called from the same place,
|
||||
don't add to the insert/remove counters. ]]--
|
||||
|
||||
if f == bk_tree.insert and p ~= bk_tree.insert then
|
||||
callee.stats.nodes = callee.stats.nodes + 1
|
||||
elseif f == bk_tree.remove and p ~= bk_tree.remove then
|
||||
callee.stats.nodes = callee.stats.nodes - 1
|
||||
elseif f == bk_tree.query and p == bk_tree.query then
|
||||
callee.stats.queries = callee.stats.queries + 1
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
--- Hooks debugging into tree execution.
|
||||
-- Keeps track of number of nodes created, queries made,
|
||||
-- note that this must be run directly after tree is created
|
||||
-- in order to get correct information.
|
||||
-- @within Debug
|
||||
--- @usage
|
||||
-- local bktree = require "bk-tree"
|
||||
-- local tree = bktree:new("word")
|
||||
-- tree:debug()
|
||||
-- tree:insert("perceive")
|
||||
-- tree:insert("beautiful")
|
||||
-- tree:insert("definitely")
|
||||
-- local result = tree:query("definately", 3)
|
||||
-- tree:print_stats()
|
||||
--
|
||||
-- -- output
|
||||
-- Nodes: 4
|
||||
-- Queries: 3
|
||||
-- Nodes Queried: 75%
|
||||
function bk_tree:debug()
|
||||
|
||||
local nc = 0
|
||||
if self.root then nc = 1 end
|
||||
self.stats = { nodes = nc, queries = 0 }
|
||||
debug.sethook(self.hook, "c")
|
||||
|
||||
end
|
||||
|
||||
--- Print execution stats.
|
||||
-- Prints nodes queried and total nodes, as well as a fraction of
|
||||
-- nodes visited to satisfy the query, resets the counter of nodes queried when called.
|
||||
-- @within Debug
|
||||
-- @see debug
|
||||
function bk_tree:print_stats()
|
||||
|
||||
print("\nNodes: " .. self.stats.nodes)
|
||||
print("Queries: " .. self.stats.queries)
|
||||
print("Nodes Queried: " .. self.stats.queries/self.stats.nodes*100 .. "%\n")
|
||||
self.stats.queries = 0
|
||||
|
||||
end
|
||||
|
||||
--- Fetch execution stats.
|
||||
-- Returns a copy of the execution stats that @{print_stats} would print, requires debug to have been enabled
|
||||
-- to not just return defaults. Useful if you want to profile things.
|
||||
-- @within Debug
|
||||
-- @return {key = value,...}
|
||||
function bk_tree:get_stats()
|
||||
|
||||
return lazy_copy(self.stats)
|
||||
|
||||
end
|
||||
|
||||
---------------------------
|
||||
--- Creates a new bk-tree.
|
||||
-- @constructor
|
||||
-- @string[opt] root_word the root of the new tree
|
||||
-- @tparam[opt=levenshtein_dist] function dist_func the distance function used
|
||||
-- @see levenshtein_dist
|
||||
-- @return the new bk-tree instance
|
||||
--- @usage
|
||||
-- local bktree = require "bk-tree"
|
||||
-- local tree = bktree:new("word")
|
||||
function bk_tree:new(root_word, dist_func)
|
||||
|
||||
local n_obj = {}
|
||||
if root_word then n_obj.root = { str = root_word, children = {} } end
|
||||
n_obj.dist_func = dist_func or self.levenshtein_dist
|
||||
|
||||
setmetatable(n_obj, self)
|
||||
self.__index = self
|
||||
|
||||
return n_obj
|
||||
|
||||
end
|
||||
|
||||
--------------------------------
|
||||
--- Inserts word into the tree.
|
||||
-- @string word
|
||||
-- @treturn bool true if inserted, false if word already exists in tree
|
||||
--- @usage
|
||||
-- local bktree = require "bk-tree"
|
||||
-- local tree = bktree:new("root")
|
||||
-- local success = tree:insert("other_word")
|
||||
function bk_tree:insert(word, node)
|
||||
|
||||
node = node or self.root
|
||||
|
||||
if not node then
|
||||
self.root = { str = word, children = {} }
|
||||
return true
|
||||
end
|
||||
|
||||
local dist = self.dist_func(word, node.str)
|
||||
if dist == 0 then return false end
|
||||
|
||||
local some_node = node.children[dist]
|
||||
|
||||
if not some_node then
|
||||
node.children[dist] = { str = word, children = {} }
|
||||
return true
|
||||
end
|
||||
|
||||
return self:insert(word, some_node)
|
||||
|
||||
end
|
||||
|
||||
--------------------------------
|
||||
--- Query the tree for matches.
|
||||
-- @string word
|
||||
-- @tparam number n max edit distance to use when querying
|
||||
-- @treturn {{str=string,distance=number},....} table of tables with matching words, empty table if no matches
|
||||
--- @usage
|
||||
-- local bktree = require "bk-tree"
|
||||
-- local tree = bktree:new("word")
|
||||
-- tree:insert("hello")
|
||||
-- tree:insert("goodbye")
|
||||
-- tree:insert("woop")
|
||||
-- local result = tree:query("woop", 1)
|
||||
function bk_tree:query(word, n, node, matches)
|
||||
|
||||
node = node or self.root
|
||||
matches = matches or {}
|
||||
|
||||
if not node then return matches end
|
||||
|
||||
local dist = self.dist_func(word, node.str)
|
||||
if dist <= n then matches[#matches+1] = {str = node.str, distance = dist} end
|
||||
|
||||
for k, child in pairs(node.children) do
|
||||
if child ~= nil then
|
||||
if k >= dist-n and k <= dist+n then
|
||||
self:query(word, n, child, matches)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return matches
|
||||
|
||||
end
|
||||
|
||||
---------------------------------------------------------
|
||||
--- Queries the the tree for a match, sorts the results.
|
||||
-- Calls @{query} and returns the results sorted.
|
||||
-- @string word
|
||||
-- @tparam number n max edit distance to use when querying
|
||||
-- @treturn {{str=string,distance=number},....} table of tables with matching words sorted by distance, empty table if no matches
|
||||
--- @usage
|
||||
-- local bktree = require "bk-tree"
|
||||
-- local tree = bktree:new("word")
|
||||
-- tree:insert("woop")
|
||||
-- tree:insert("worp")
|
||||
-- tree:insert("warp")
|
||||
-- local result = tree:query_sorted("woop", 3)
|
||||
function bk_tree:query_sorted(word, n)
|
||||
|
||||
local result = self:query(word, n)
|
||||
|
||||
table.sort(result, function(a,b) return a.distance < b.distance end)
|
||||
|
||||
return result
|
||||
|
||||
end
|
||||
|
||||
|
||||
local tree = bk_tree:new("word")
|
||||
tree:insert("hello")
|
||||
tree:insert("welp")
|
||||
tree:insert("function")
|
||||
tree:insert("this long line what")
|
||||
tree:insert("what this long line what")
|
||||
print(vim.inspect(tree))
|
||||
print(vim.inspect(tree:query_sorted("what", 3)))
|
||||
|
||||
|
||||
return bk_tree
|
||||
@@ -1,3 +1,4 @@
|
||||
package.loaded['telescope.pickers'] = nil
|
||||
local telescope = require('telescope')
|
||||
|
||||
-- Goals:
|
||||
@@ -61,15 +62,15 @@ local file_sorter = telescope.sorters.new {
|
||||
if prompt == '' then return 0 end
|
||||
if not line then return -1 end
|
||||
|
||||
local dist = string_distance(prompt, line)
|
||||
-- if dist > (0.75 * #line) and #prompt > 3 then
|
||||
-- return -1
|
||||
-- end
|
||||
|
||||
return dist
|
||||
return tonumber(vim.fn.systemlist(string.format(
|
||||
"echo '%s' | ~/tmp/fuzzy_test/target/debug/fuzzy_test '%s'",
|
||||
line,
|
||||
prompt
|
||||
))[1])
|
||||
end
|
||||
}
|
||||
|
||||
|
||||
local file_previewer = telescope.previewers.vim_buffer
|
||||
|
||||
local file_picker = telescope.pickers.new {
|
||||
|
||||
8
scratch/fuzzy_tester.lua
Normal file
8
scratch/fuzzy_tester.lua
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
line = "hello"
|
||||
prompt = "h"
|
||||
print(vim.inspect(vim.fn.systemlist(string.format(
|
||||
"echo '%s' | ~/tmp/fuzzy_test/target/debug/fuzzy_test '%s'",
|
||||
line,
|
||||
prompt
|
||||
))))
|
||||
87
scratch/ngrams.lua
Normal file
87
scratch/ngrams.lua
Normal file
@@ -0,0 +1,87 @@
|
||||
|
||||
local function ngrams(counts, doc)
|
||||
local DEPTH = 5
|
||||
local docLen = #doc
|
||||
local min, concat = math.min, table.concat
|
||||
for i = 1, docLen - 1 do
|
||||
for j = i, min(i + DEPTH - 1, docLen) do
|
||||
if not doc[j] then break end
|
||||
local k = concat(doc, " ", i, j)
|
||||
counts[k] = (counts[k] or 0) + 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
local bz = io.popen('bzcat /home/tj/Downloads/pages.xml.bz2')
|
||||
local title, content = "", ""
|
||||
local inText = false
|
||||
|
||||
local numDocs = 0
|
||||
local globalCounts = {}
|
||||
|
||||
local function set(t)
|
||||
local s = {}
|
||||
for _, v in pairs(t) do s[v] = true end
|
||||
return s
|
||||
end
|
||||
|
||||
local bad = set({
|
||||
'after', 'also', 'article', 'date', 'defaultsort', 'external', 'first', 'from',
|
||||
'have', 'html', 'http', 'image', 'infobox', 'links', 'name', 'other', 'preserve',
|
||||
'references', 'reflist', 'space', 'that', 'this', 'title', 'which', 'with',
|
||||
'quot', 'ref', 'name', 'http', 'amp', 'ndash', 'www', 'cite', 'nbsp',
|
||||
'style', 'text', 'align', 'center', 'background'
|
||||
})
|
||||
|
||||
local function isnumber(w)
|
||||
s, e = w:find("[0-9]+")
|
||||
return s
|
||||
end
|
||||
|
||||
for line in bz:lines() do
|
||||
local _, _, mTitle = line:find("<title>(.*)</title>")
|
||||
local _, _, bText = line:find("<text[^>]*>([^<]*)")
|
||||
local eText, _ = line:find("</text>")
|
||||
|
||||
if mTitle then
|
||||
title = mTitle
|
||||
elseif bText then
|
||||
content = bText
|
||||
inText = true
|
||||
elseif inText then
|
||||
content = content .. line
|
||||
end
|
||||
|
||||
if eText then
|
||||
words = {}
|
||||
for v in content:gmatch("%w+") do
|
||||
v = v:lower()
|
||||
if #v >= 3 and #v < 12 and not bad[v] and not isnumber(v) then
|
||||
table.insert(words, v)
|
||||
else
|
||||
table.insert(words, nil)
|
||||
end
|
||||
end
|
||||
|
||||
ngrams(globalCounts, words)
|
||||
inText = false
|
||||
|
||||
numDocs = numDocs + 1
|
||||
if numDocs % 10 == 0 then
|
||||
io.write(string.format("Working... %d documents processed.\r", numDocs))
|
||||
io.flush()
|
||||
end
|
||||
|
||||
if numDocs == 500 then
|
||||
local f = io.open('/tmp/freqs.lua.txt', 'w')
|
||||
for k, v in pairs(globalCounts) do
|
||||
f:write(k, '\t', v, '\n')
|
||||
end
|
||||
f:close()
|
||||
|
||||
globalCounts = {}
|
||||
os.exit(0)
|
||||
end
|
||||
end
|
||||
end
|
||||
4
scratch/picker_locations.lua
Normal file
4
scratch/picker_locations.lua
Normal file
@@ -0,0 +1,4 @@
|
||||
package.loaded['telescope'] = nil
|
||||
package.loaded['telescope.init'] = nil
|
||||
package.loaded['telescope.picker'] = nil
|
||||
package.loaded['telescope.finder'] = nil
|
||||
@@ -1,10 +1,10 @@
|
||||
|
||||
echo "hello"
|
||||
sleep 1
|
||||
echo "cool"
|
||||
echo "help"
|
||||
sleep 1
|
||||
echo "world"
|
||||
echo "hi"
|
||||
sleep 1
|
||||
echo "x"
|
||||
echo "husband"
|
||||
sleep 1
|
||||
echo "y"
|
||||
echo "helper"
|
||||
|
||||
Reference in New Issue
Block a user