对于我的生活,我无法从 Open Street Maps反序列化protobuf文件. 我试图反序列化以下提取:http://download.geofabrik.de/osm/north-america/us-northeast.osm.pbf获取节点,我使用http://code.google.com/p/protobuf-net/作
对于我的生活,我无法从 Open Street Maps反序列化protobuf文件.




对;问题是这不仅仅是protobuf – 它是一种混合文件格式( defined here在内部包含各种格式的protobuf.它还包含压缩(虽然看起来是可选的).

我已经从规范中拆开了我的能力,我在这里有一个C#阅读器,它使用protobuf-net来处理块 – 它很高兴地通过该文件读到最后 – 我可以告诉你有4515个块( BlockHeader).当它到达Blob时,我对于规范如何标记OSMHeader和OSMData有点困惑 – 我在这里接受建议!我还使用ZLIB.NET来处理正在使用的zlib压缩.在没有理解这个问题的情况下,我决定处理ZLIB数据并根据声称的大小进行验证,以确定它至少是理智的.

如果你能弄清楚(或询问作者)他们是如何分离OSMHeader和OSMData的话,我会很乐意在其他地方做些什么.我希望你不介意我已经停在这里了 – 但它已经过了几个小时; p

using System;
using System.IO;
using OpenStreetMap; // where my .proto-generated entities are living
using ProtoBuf; // protobuf-net
using zlib; // ZLIB.NET    

class OpenStreetMapParser

    static void Main()
        using (var file = File.OpenRead("us-northeast.osm.pbf"))
            // from http://wiki.openstreetmap.org/wiki/ProtocolBufBinary:
            //A file contains a header followed by a sequence of fileblocks. The design is intended to allow future random-access to the contents of the file and skipping past not-understood or unwanted data.
            //The format is a repeating sequence of:
            //int4: length of the BlockHeader message in network byte order
            //serialized BlockHeader message
            //serialized Blob message (size is given in the header)

            int length, blockCount = 0;
            while (Serializer.TryReadLengthPrefix(file, PrefixStyle.Fixed32, out length))
                // I'm just being lazy and re-using something "close enough" here
                // note that v2 has a big-endian option, but Fixed32 assumes little-endian - we
                // actually need the other way around (network byte order):
                uint len = (uint)length;
                len = ((len & 0xFF) << 24) | ((len & 0xFF00) << 8) | ((len & 0xFF0000) >> 8) | ((len & 0xFF000000) >> 24);
                length = (int)len;

                BlockHeader header;
                // again, v2 has capped-streams built in, but I'm deliberately
                // limiting myself to v1 features
                using (var tmp = new LimitedStream(file, length))
                    header = Serializer.Deserialize<BlockHeader>(tmp);
                Blob blob;
                using (var tmp = new LimitedStream(file, header.datasize))
                    blob = Serializer.Deserialize<Blob>(tmp);
                if(blob.zlib_data == null) throw new NotSupportedException("I'm only handling zlib here!");

                using(var ms = new MemoryStream(blob.zlib_data))
                using(var zlib = new ZLibStream(ms))
                { // at this point I'm very unclear how the OSMHeader and OSMData are packed - it isn't clear
                    // read this to the end, to check we can parse the zlib
                    int payloadLen = 0;
                    while (zlib.ReadByte() >= 0) payloadLen++;
                    if (payloadLen != blob.raw_size) throw new FormatException("Screwed that up...");
                Console.WriteLine("Read block " + blockCount.ToString());

            Console.WriteLine("all done");
abstract class InputStream : Stream
    protected abstract int ReadNextBlock(byte[] buffer, int offset, int count);
    public sealed override int Read(byte[] buffer, int offset, int count)
        int bytesRead, totalRead = 0;
        while (count > 0 && (bytesRead = ReadNextBlock(buffer, offset, count)) > 0)
            count -= bytesRead;
            offset += bytesRead;
            totalRead += bytesRead;
            pos += bytesRead;
        return totalRead;
    long pos;
    public override void Write(byte[] buffer, int offset, int count)
        throw new NotImplementedException();
    public override void SetLength(long value)
        throw new NotImplementedException();
    public override long Position
            return pos;
            if (pos != value) throw new NotImplementedException();
    public override long Length
        get { throw new NotImplementedException(); }
    public override void Flush()
        throw new NotImplementedException();
    public override bool CanWrite
        get { return false; }
    public override bool CanRead
        get { return true; }
    public override bool CanSeek
        get { return false; }
    public override long Seek(long offset, SeekOrigin origin)
        throw new NotImplementedException();
class ZLibStream : InputStream
{   // uses ZLIB.NET: http://www.componentace.com/download/download.php?editionid=25
    private ZInputStream reader; // seriously, why isn't this a stream?
    public ZLibStream(Stream stream)
        reader = new ZInputStream(stream);
    public override void Close()
    protected override int ReadNextBlock(byte[] buffer, int offset, int count)
        // OMG! reader.Read is the base-stream, reader.read is decompressed! yeuch
        return reader.read(buffer, offset, count);

// deliberately doesn't dispose the base-stream    
class LimitedStream : InputStream
    private Stream stream;
    private long remaining;
    public LimitedStream(Stream stream, long length)
        if (length < 0) throw new ArgumentOutOfRangeException("length");
        if (stream == null) throw new ArgumentNullException("stream");
        if (!stream.CanRead) throw new ArgumentException("stream");
        this.stream = stream;
        this.remaining = length;
    protected override int ReadNextBlock(byte[] buffer, int offset, int count)
        if(count > remaining) count = (int)remaining;
        int bytesRead = stream.Read(buffer, offset, count);
        if (bytesRead > 0) remaining -= bytesRead;
        return bytesRead;