当前位置 : 主页 > 网络安全 > 测试自动化 >

性能 – 为什么我的Trie查找速度慢于标准F#Map的速度?

来源:互联网 收集:自由互联 发布时间:2021-06-22
所以,我刚从OCaml移植了Trie.不幸的是,就tryFind而言,它比标准Map运行得慢.我不明白这一点 – 特里似乎应该更快. F#的代码库是以某种特殊方式构建的,以使它们比用户通常部署的代码更快吗
所以,我刚从OCaml移植了Trie.不幸的是,就tryFind而言,它比标准Map运行得慢.我不明白这一点 – 特里似乎应该更快. F#的代码库是以某种特殊方式构建的,以使它们比用户通常部署的代码更快吗?

这是代码 –

[<RequireQualifiedAccess>]
module Trie

type Node<'k, 'v when 'k : comparison> =
    { TrieMap : Map<'k, Node<'k, 'v>>
      TrieKvp : ('k list * 'v) option }
    member inline x.IsEmpty = x.TrieKvp.IsNone && x.TrieMap.IsEmpty

let inline make map kvp =
    { TrieMap = map
      TrieKvp = kvp }

let inline makeEmpty () : Node<'k, 'v> = make Map.empty None

let inline isEmpty (node : Node<'k, 'v>) = node.IsEmpty

let rec tryFind (key : 'k list) node =
    if key.IsEmpty then
        match node.TrieKvp with
        | Some (_, value) -> Some value
        | None -> None
    else
        let keyHead = key.Head
        let keyTail = key.Tail
        let optSubNode = Map.tryFind keyHead node.TrieMap
        match optSubNode with
        | Some subNode -> tryFind keyTail subNode
        | None -> None

let inline containsKey key node =
    (tryFind key node).IsSome

let rec addInternal (key : 'k list) value node =
    if key.IsEmpty then make node.TrieMap (Some (key, value))
    else
        let keyHead = key.Head
        let keyTail = key.Tail
        let newTrie =
            match Map.tryFind keyHead node.TrieMap with
            | Some subTrie -> subTrie
            | None -> makeEmpty ()
        let newTrie2 = addInternal keyTail value newTrie
        make (Map.add keyHead newTrie2 node.TrieMap) node.TrieKvp

let inline add key value node =
    addInternal key value node

let rec addMany kvps node =
    if Seq.isEmpty kvps then node
    else
        let kvpHead = Seq.head kvps
        let kvpTail = Seq.skip 1 kvps
        let newTrie = add (fst kvpHead) (snd kvpHead) node
        addMany kvpTail newTrie

let inline ofList kvps =
    addMany kvps (makeEmpty ())

let inline ofListBy by kvps =
    let pairs = List.map by kvps
    ofList pairs

let rec foldInternal folder rev node state =
    match node.TrieKvp with
    | Some (_, value) -> folder (Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap) (List.rev rev) value
    | None -> Map.fold (fun state key value -> foldInternal folder (key :: rev) value state) state node.TrieMap

let inline fold folder state node =
    foldInternal folder [] node state

let rec map (mapper : 'k list -> 'v -> 'a) (node : Node<'k, 'v>) : Node<'k, 'a> =
    match node.TrieKvp with
    | Some (key, value) -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) (Some (key, mapper key value))
    | None -> make (Map.map (fun _ value -> map mapper value) node.TrieMap) None

let inline toValueList node =
    fold (fun state _ value -> value :: state) [] node

let inline singleton (key, value) =
    add key value (makeEmpty ())

这是Jon Harrop提供的性能测试,我发现它足以测量改进 –

let xs = Array.init 1000000 (fun i -> [i])
let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Trie.makeEmpty()
for i=0 to xs.Length-1 do
    t <- Trie.add xs.[i] xs.[i] t
printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
    for i=0 to xs.Length-1 do
        ignore(Trie.tryFind xs.[i])
printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds

let timer = System.Diagnostics.Stopwatch.StartNew()
let mutable t = Map.empty
for i=0 to xs.Length-1 do
    t <- Map.add xs.[i] xs.[i] t
printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
timer.Restart()
for _ in 1..100 do
    for i=0 to xs.Length-1 do
        ignore(Map.tryFind xs.[i])
printfn "Map took %fs to search" timer.Elapsed.TotalSeconds

注意:如果您有更快的查找数据结构,请注意我需要一个持久的数据结构.

Unfortunately, it runs slower than the standard Map in terms of tryFind. I don’t understand this – the trie seems like it should be faster.

这里的快速基准测试表明,至少在简单的情况下,你的trie已经比Map快了:

do
    let n = 0
    let xs = Array.init 1000000 (fun i -> [i])
    let timer = System.Diagnostics.Stopwatch.StartNew()
    let mutable t = Trie.makeEmpty()
    for i=0 to xs.Length-1 do
        t <- Trie.add xs.[i] xs.[i] t
    printfn "Trie took %fs to build" timer.Elapsed.TotalSeconds
    timer.Restart()
    for _ in 1..100 do
        for i=0 to xs.Length-1 do
            ignore(Trie.tryFind xs.[i])
    printfn "Trie took %fs to search" timer.Elapsed.TotalSeconds

    let timer = System.Diagnostics.Stopwatch.StartNew()
    let mutable t = Map.empty
    for i=0 to xs.Length-1 do
        t <- Map.add xs.[i] xs.[i] t
    printfn "Map took %fs to build" timer.Elapsed.TotalSeconds
    timer.Restart()
    for _ in 1..100 do
        for i=0 to xs.Length-1 do
            ignore(Map.tryFind xs.[i])
    printfn "Map took %fs to search" timer.Elapsed.TotalSeconds

我得到4s来构建你的Trie,8.7s来构建一个Map,在这两种情况下都有大约0.7来搜索.

但是,您的实施还有很大的改进空间.我最近写了一篇关于F#中优化的通用持久哈希trie实现的文章,该文章发布于here.

您之后的评论意味着您只想使用它来映射字符串.如果是这样的话,将字符串键专门化为trie会更有效率.

编辑

KVB建议我详细说明“改进的空间”,所以这里有一些反馈:

>谨慎使用内联作为优化,并且仅基于引人注目的性能测量.>使空值而不是函数.>尽可能避免使用List.head和List.tail.请改用模式匹配.>尽可能避免通用性(例如,在这种情况下为字符串键的硬编码).

网友评论