对于我的生活,我无法从 Open Street Maps反序列化protobuf文件. 我试图反序列化以下提取:http://download.geofabrik.de/osm/north-america/us-northeast.osm.pbf获取节点,我使用http://code.google.com/p/protobuf-net/作
我试图反序列化以下提取:http://download.geofabrik.de/osm/north-america/us-northeast.osm.pbf获取节点,我使用http://code.google.com/p/protobuf-net/作为库.我试图反序列化一堆不同的对象,但它们都是null.
原型文件可以在这里找到:http://trac.openstreetmap.org/browser/applications/utils/export/osm2pgsql/protobuf
有什么建议?
对;问题是这不仅仅是protobuf – 它是一种混合文件格式( defined here在内部包含各种格式的protobuf.它还包含压缩(虽然看起来是可选的).我已经从规范中拆开了我的能力,我在这里有一个C#阅读器,它使用protobuf-net来处理块 – 它很高兴地通过该文件读到最后 – 我可以告诉你有4515个块( BlockHeader).当它到达Blob时,我对于规范如何标记OSMHeader和OSMData有点困惑 – 我在这里接受建议!我还使用ZLIB.NET来处理正在使用的zlib压缩.在没有理解这个问题的情况下,我决定处理ZLIB数据并根据声称的大小进行验证,以确定它至少是理智的.
如果你能弄清楚(或询问作者)他们是如何分离OSMHeader和OSMData的话,我会很乐意在其他地方做些什么.我希望你不介意我已经停在这里了 – 但它已经过了几个小时; p
using System; using System.IO; using OpenStreetMap; // where my .proto-generated entities are living using ProtoBuf; // protobuf-net using zlib; // ZLIB.NET class OpenStreetMapParser { static void Main() { using (var file = File.OpenRead("us-northeast.osm.pbf")) { // from http://wiki.openstreetmap.org/wiki/ProtocolBufBinary: //A file contains a header followed by a sequence of fileblocks. The design is intended to allow future random-access to the contents of the file and skipping past not-understood or unwanted data. //The format is a repeating sequence of: //int4: length of the BlockHeader message in network byte order //serialized BlockHeader message //serialized Blob message (size is given in the header) int length, blockCount = 0; while (Serializer.TryReadLengthPrefix(file, PrefixStyle.Fixed32, out length)) { // I'm just being lazy and re-using something "close enough" here // note that v2 has a big-endian option, but Fixed32 assumes little-endian - we // actually need the other way around (network byte order): uint len = (uint)length; len = ((len & 0xFF) << 24) | ((len & 0xFF00) << 8) | ((len & 0xFF0000) >> 8) | ((len & 0xFF000000) >> 24); length = (int)len; BlockHeader header; // again, v2 has capped-streams built in, but I'm deliberately // limiting myself to v1 features using (var tmp = new LimitedStream(file, length)) { header = Serializer.Deserialize<BlockHeader>(tmp); } Blob blob; using (var tmp = new LimitedStream(file, header.datasize)) { blob = Serializer.Deserialize<Blob>(tmp); } if(blob.zlib_data == null) throw new NotSupportedException("I'm only handling zlib here!"); using(var ms = new MemoryStream(blob.zlib_data)) using(var zlib = new ZLibStream(ms)) { // at this point I'm very unclear how the OSMHeader and OSMData are packed - it isn't clear // read this to the end, to check we can parse the zlib int payloadLen = 0; while (zlib.ReadByte() >= 0) payloadLen++; if (payloadLen != blob.raw_size) throw new FormatException("Screwed that up..."); } blockCount++; Console.WriteLine("Read block " + blockCount.ToString()); } Console.WriteLine("all done"); Console.ReadLine(); } } } abstract class InputStream : Stream { protected abstract int ReadNextBlock(byte[] buffer, int offset, int count); public sealed override int Read(byte[] buffer, int offset, int count) { int bytesRead, totalRead = 0; while (count > 0 && (bytesRead = ReadNextBlock(buffer, offset, count)) > 0) { count -= bytesRead; offset += bytesRead; totalRead += bytesRead; pos += bytesRead; } return totalRead; } long pos; public override void Write(byte[] buffer, int offset, int count) { throw new NotImplementedException(); } public override void SetLength(long value) { throw new NotImplementedException(); } public override long Position { get { return pos; } set { if (pos != value) throw new NotImplementedException(); } } public override long Length { get { throw new NotImplementedException(); } } public override void Flush() { throw new NotImplementedException(); } public override bool CanWrite { get { return false; } } public override bool CanRead { get { return true; } } public override bool CanSeek { get { return false; } } public override long Seek(long offset, SeekOrigin origin) { throw new NotImplementedException(); } } class ZLibStream : InputStream { // uses ZLIB.NET: http://www.componentace.com/download/download.php?editionid=25 private ZInputStream reader; // seriously, why isn't this a stream? public ZLibStream(Stream stream) { reader = new ZInputStream(stream); } public override void Close() { reader.Close(); base.Close(); } protected override int ReadNextBlock(byte[] buffer, int offset, int count) { // OMG! reader.Read is the base-stream, reader.read is decompressed! yeuch return reader.read(buffer, offset, count); } } // deliberately doesn't dispose the base-stream class LimitedStream : InputStream { private Stream stream; private long remaining; public LimitedStream(Stream stream, long length) { if (length < 0) throw new ArgumentOutOfRangeException("length"); if (stream == null) throw new ArgumentNullException("stream"); if (!stream.CanRead) throw new ArgumentException("stream"); this.stream = stream; this.remaining = length; } protected override int ReadNextBlock(byte[] buffer, int offset, int count) { if(count > remaining) count = (int)remaining; int bytesRead = stream.Read(buffer, offset, count); if (bytesRead > 0) remaining -= bytesRead; return bytesRead; } }