From 89f1396fd1a400c09781bf8996f10c7e59e395ff Mon Sep 17 00:00:00 2001 From: jimblandy Date: Thu, 11 Feb 2010 22:44:01 +0000 Subject: [PATCH] Breakpad DWARF parser: Expand comments for ByteReader class. This is preparation for adding support for reading Linux C++ exception handling data's encoded pointers. The change should have no user-visible effect; it simply expands the comments for dwarf2reader::ByteReader, and regroups the member functions. a=jimblandy, r=nealsid git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@522 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/common/dwarf/bytereader.h | 165 +++++++++++++++++++++++++--------- 1 file changed, 122 insertions(+), 43 deletions(-) diff --git a/src/common/dwarf/bytereader.h b/src/common/dwarf/bytereader.h index ead61f0e..bda450d1 100644 --- a/src/common/dwarf/bytereader.h +++ b/src/common/dwarf/bytereader.h @@ -43,75 +43,154 @@ enum Endianness { ENDIANNESS_LITTLE }; -// Class that knows how to read both big endian and little endian -// numbers, for use in DWARF2/3 reader. -// Takes an endianness argument. -// To read addresses and offsets, SetAddressSize and SetOffsetSize -// must be called first. +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information. class ByteReader { public: - explicit ByteReader(enum Endianness endian); + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, and signed and + // unsigned LEB128 numbers. + explicit ByteReader(enum Endianness endianness); virtual ~ByteReader(); - // Set the address size to SIZE, which sets up the ReadAddress member - // so that it works. - void SetAddressSize(uint8 size); - - // Set the offset size to SIZE, which sets up the ReadOffset member - // so that it works. - void SetOffsetSize(uint8 size); - - // Return the current offset size - uint8 OffsetSize() const { return offset_size_; } - - // Return the current address size - uint8 AddressSize() const { return address_size_; } - // Read a single byte from BUFFER and return it as an unsigned 8 bit // number. uint8 ReadOneByte(const char* buffer) const; - // Read two bytes from BUFFER and return it as an unsigned 16 bit - // number. + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. uint16 ReadTwoBytes(const char* buffer) const; - // Read four bytes from BUFFER and return it as an unsigned 32 bit - // number. This function returns a uint64 so that it is compatible - // with ReadAddress and ReadOffset. The number it returns will - // never be outside the range of an unsigned 32 bit integer. + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64 so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. uint64 ReadFourBytes(const char* buffer) const; - // Read eight bytes from BUFFER and return it as an unsigned 64 bit - // number + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. uint64 ReadEightBytes(const char* buffer) const; // Read an unsigned LEB128 (Little Endian Base 128) number from - // BUFFER and return it as an unsigned 64 bit integer. LEN is set - // to the length read. Everybody seems to reinvent LEB128 as a - // variable size integer encoding, DWARF has had it for a long time. + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; // Read a signed LEB128 number from BUFFER and return it as an - // signed 64 bit integer. LEN is set to the length read. + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. int64 ReadSignedLEB128(const char* buffer, size_t* len) const; - // Read an offset from BUFFER and return it as an unsigned 64 bit - // integer. DWARF2/3 define offsets as either 4 or 8 bytes, - // generally depending on the amount of DWARF2/3 info present. - uint64 ReadOffset(const char* buffer) const; + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8 size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8 AddressSize() const { return address_size_; } // Read an address from BUFFER and return it as an unsigned 64 bit - // integer. DWARF2/3 allow addresses to be any size from 0-255 - // bytes currently. Internally we support 4 and 8 byte addresses, - // and will CHECK on anything else. + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. uint64 ReadAddress(const char* buffer) const; - // Read a DWARF2/3 initial length field from START, and report the - // length of the length field in LEN. Return the value of the length - // field. Set this reader's offset size as indicated by the length - // field's encoding. + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. uint64 ReadInitialLength(const char* start, size_t* len); + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64 ReadOffset(const char* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8 OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8 size); + private: // Function pointer type for our address and offset readers.