所以,我刚从OCaml移植了Trie.不幸的是,就tryFind而言,它比标准Map运行得慢.我不明白这一点 – 特里似乎应该更快. F#的代码库是以某种特殊方式构建的,以使它们比用户通常部署的代码更快吗
这是代码 –
[<RequireQualifiedAccess>] module Trie type Node<'k, 'v when 'k : comparison> = { TrieMap : Map<'k, Node<'k, 'v>> TrieKvp : ('k list * 'v) option } member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty let inline make map kvp = { TrieMap = map TrieKvp = kvp } let inline makeEmpty () : Node<'k, 'v> = make Map.empty None let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty let rec tryFind (key : 'k list) node = if key.IsEmpty then match node.TrieKvp with | Some (_, value) -> Some value | None -> None else let keyHead = key.Head let keyTail = key.Tail let optSubNode = Map.tryFind keyHead node.TrieMap match optSubNode with | Some subNode -> tryFind keyTail subNode | None -> None let inline containsKey key node = (tryFind key node).IsSome let rec addInternal (key : 'k list) value node = if key.IsEmpty then make node.TrieMap (Some (key, value)) else let keyHead = key.Head let keyTail = key.Tail let newTrie = match Map.tryFind keyHead node.TrieMap with | Some subTrie -> subTrie | None -> makeEmpty () let newTrie2 = addInternal keyTail value newTrie make (Map.add keyHead newTrie2 node.TrieMap) node.TrieKvp let inline add key value node = addInternal key value node let rec addMany kvps node = if Seq.isEmpty kvps then node else let kvpHead = Seq.head kvps let kvpTail = Seq.skip 1 kvps let newTrie = add (fst kvpHead) (snd kvpHead) node addMany kvpTail newTrie let inline ofList kvps = addMany kvps (makeEmpty ()) let inline ofListBy by kvps = let pairs = List.map by kvps ofList pairs let rec foldInternal folder rev node state = match node.TrieKvp with | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap let inline fold folder state node = foldInternal folder [] node state let rec map (mapper : 'k list -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> = match node.TrieKvp with | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value)) | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None let inline toValueList node = fold (fun state _ value -> value :: state) [] node let inline singleton (key, value) = add key value (makeEmpty ())
这是Jon Harrop提供的性能测试,我发现它足以测量改进 –
let xs = Array.init 1000000 (fun i -> [i]) let timer = System.Diagnostics.Stopwatch.StartNew() let mutable t = Trie.makeEmpty() for i=0 to xs.Length-1 do t <- Trie.add xs.[i] xs.[i] t printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds timer.Restart() for _ in 1..100 do for i=0 to xs.Length-1 do ignore(Trie.tryFind xs.[i]) printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds let timer = System.Diagnostics.Stopwatch.StartNew() let mutable t = Map.empty for i=0 to xs.Length-1 do t <- Map.add xs.[i] xs.[i] t printfn "Map took %fs to build" timer.Elapsed.TotalSeconds timer.Restart() for _ in 1..100 do for i=0 to xs.Length-1 do ignore(Map.tryFind xs.[i]) printfn "Map took %fs to search" timer.Elapsed.TotalSeconds
注意:如果您有更快的查找数据结构,请注意我需要一个持久的数据结构.
Unfortunately, it runs slower than the standard Map in terms of tryFind. I don’t understand this – the trie seems like it should be faster.
这里的快速基准测试表明,至少在简单的情况下,你的trie已经比Map快了:
do let n = 0 let xs = Array.init 1000000 (fun i -> [i]) let timer = System.Diagnostics.Stopwatch.StartNew() let mutable t = Trie.makeEmpty() for i=0 to xs.Length-1 do t <- Trie.add xs.[i] xs.[i] t printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds timer.Restart() for _ in 1..100 do for i=0 to xs.Length-1 do ignore(Trie.tryFind xs.[i]) printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds let timer = System.Diagnostics.Stopwatch.StartNew() let mutable t = Map.empty for i=0 to xs.Length-1 do t <- Map.add xs.[i] xs.[i] t printfn "Map took %fs to build" timer.Elapsed.TotalSeconds timer.Restart() for _ in 1..100 do for i=0 to xs.Length-1 do ignore(Map.tryFind xs.[i]) printfn "Map took %fs to search" timer.Elapsed.TotalSeconds
我得到4s来构建你的Trie,8.7s来构建一个Map,在这两种情况下都有大约0.7来搜索.
但是,您的实施还有很大的改进空间.我最近写了一篇关于F#中优化的通用持久哈希trie实现的文章,该文章发布于here.
您之后的评论意味着您只想使用它来映射字符串.如果是这样的话,将字符串键专门化为trie会更有效率.
编辑
KVB建议我详细说明“改进的空间”,所以这里有一些反馈:
>谨慎使用内联作为优化,并且仅基于引人注目的性能测量.>使空值而不是函数.>尽可能避免使用List.head和List.tail.请改用模式匹配.>尽可能避免通用性(例如,在这种情况下为字符串键的硬编码).