所以,我刚从OCaml移植了Trie.不幸的是,就tryFind而言,它比标准Map运行得慢.我不明白这一点 – 特里似乎应该更快. F#的代码库是以某种特殊方式构建的,以使它们比用户通常部署的代码更快吗
这是代码 –
[<RequireQualifiedAccess>]
module Trie
type Node<'k, 'v when 'k : comparison> =
{ TrieMap : Map<'k, Node<'k, 'v>>
TrieKvp : ('k list * 'v) option }
member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty
let inline make map kvp =
{ TrieMap = map
TrieKvp = kvp }
let inline makeEmpty () : Node<'k, 'v> = make Map.empty None
let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty
let rec tryFind (key : 'k list) node =
if key.IsEmpty then
match node.TrieKvp with
| Some (_, value) -> Some value
| None -> None
else
let keyHead = key.Head
let keyTail = key.Tail
let optSubNode = Map.tryFind keyHead node.TrieMap
match optSubNode with
| Some subNode -> tryFind keyTail subNode
| None -> None
let inline containsKey key node =
(tryFind key node).IsSome
let rec addInternal (key : 'k list) value node =
if key.IsEmpty then make node.TrieMap (Some (key, value))
else
let keyHead = key.Head
let keyTail = key.Tail
let newTrie =
match Map.tryFind keyHead node.TrieMap with
| Some subTrie -> subTrie
| None -> makeEmpty ()
let newTrie2 = addInternal keyTail value newTrie
make (Map.add keyHead newTrie2 node.TrieMap) node.TrieKvp
let inline add key value node =
addInternal key value node
let rec addMany kvps node =
if Seq.isEmpty kvps then node
else
let kvpHead = Seq.head kvps
let kvpTail = Seq.skip 1 kvps
let newTrie = add (fst kvpHead) (snd kvpHead) node
addMany kvpTail newTrie
let inline ofList kvps =
addMany kvps (makeEmpty ())
let inline ofListBy by kvps =
let pairs = List.map by kvps
ofList pairs
let rec foldInternal folder rev node state =
match node.TrieKvp with
| Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
| None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap
let inline fold folder state node =
foldInternal folder [] node state
let rec map (mapper : 'k list -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
match node.TrieKvp with
| Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
| None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None
let inline toValueList node =
fold (fun state _ value -> value :: state) [] node
let inline singleton (key, value) =
add key value (makeEmpty ())
这是Jon Harrop提供的性能测试,我发现它足以测量改进 –
let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds
注意:如果您有更快的查找数据结构,请注意我需要一个持久的数据结构.
Unfortunately, it runs slower than the standard Map in terms of tryFind. I don’t understand this – the trie seems like it should be faster.
这里的快速基准测试表明,至少在简单的情况下,你的trie已经比Map快了:
do
let n = 0
let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
for i=0 to xs.Length-1 do
ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds
我得到4s来构建你的Trie,8.7s来构建一个Map,在这两种情况下都有大约0.7来搜索.
但是,您的实施还有很大的改进空间.我最近写了一篇关于F#中优化的通用持久哈希trie实现的文章,该文章发布于here.
您之后的评论意味着您只想使用它来映射字符串.如果是这样的话,将字符串键专门化为trie会更有效率.
编辑
KVB建议我详细说明“改进的空间”,所以这里有一些反馈:
>谨慎使用内联作为优化,并且仅基于引人注目的性能测量.>使空值而不是函数.>尽可能避免使用List.head和List.tail.请改用模式匹配.>尽可能避免通用性(例如,在这种情况下为字符串键的硬编码).
