Breakpad DWARF parser: Expand comments for ByteReader class.

This is preparation for adding support for reading Linux C++ exception handling data's encoded pointers. The change should have no user-visible effect; it simply expands the comments for dwarf2reader::ByteReader, and regroups the member functions. a=jimblandy, r=nealsid git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@522 4c0a9323-5329-0410-9bdc-e9ce6186880e
2024-11-24 17:25:40 +01:00 · 2010-02-11 22:44:01 +00:00 · 2010-02-11 22:44:01 +00:00 · 89f1396fd1
commit 89f1396fd1
parent 71f7580891
1 changed files with 122 additions and 43 deletions
--- a/src/common/dwarf/bytereader.h
+++ b/src/common/dwarf/bytereader.h
@ -43,75 +43,154 @@ enum Endianness {
  ENDIANNESS_LITTLE
 };

-// Class that knows how to read both big endian and little endian
-// numbers, for use in DWARF2/3 reader.
-// Takes an endianness argument.
-// To read addresses and offsets, SetAddressSize and SetOffsetSize
-// must be called first.
+// A ByteReader knows how to read single- and multi-byte values of
+// various endiannesses, sizes, and encodings, as used in DWARF
+// debugging information.
 class ByteReader {
 public:
-  explicit ByteReader(enum Endianness endian);
+  // Construct a ByteReader capable of reading one-, two-, four-, and
+  // eight-byte values according to ENDIANNESS, absolute machine-sized
+  // addresses, DWARF-style "initial length" values, and signed and
+  // unsigned LEB128 numbers.
+  explicit ByteReader(enum Endianness endianness);
  virtual ~ByteReader();

-  // Set the address size to SIZE, which sets up the ReadAddress member
-  // so that it works.
-  void SetAddressSize(uint8 size);
-
-  // Set the offset size to SIZE, which sets up the ReadOffset member
-  // so that it works.
-  void SetOffsetSize(uint8 size);
-
-  // Return the current offset size
-  uint8 OffsetSize() const { return offset_size_; }
-
-  // Return the current address size
-  uint8 AddressSize() const { return address_size_; }
-
  // Read a single byte from BUFFER and return it as an unsigned 8 bit
  // number.
  uint8 ReadOneByte(const char* buffer) const;

-  // Read two bytes from BUFFER and return it as an unsigned 16 bit
-  // number.
+  // Read two bytes from BUFFER and return them as an unsigned 16 bit
+  // number, using this ByteReader's endianness.
  uint16 ReadTwoBytes(const char* buffer) const;

-  // Read four bytes from BUFFER and return it as an unsigned 32 bit
-  // number.  This function returns a uint64 so that it is compatible
-  // with ReadAddress and ReadOffset.  The number it returns will
-  // never be outside the range of an unsigned 32 bit integer.
+  // Read four bytes from BUFFER and return them as an unsigned 32 bit
+  // number, using this ByteReader's endianness. This function returns
+  // a uint64 so that it is compatible with ReadAddress and
+  // ReadOffset. The number it returns will never be outside the range
+  // of an unsigned 32 bit integer.
  uint64 ReadFourBytes(const char* buffer) const;

-  // Read eight bytes from BUFFER and return it as an unsigned 64 bit
-  // number
+  // Read eight bytes from BUFFER and return them as an unsigned 64
+  // bit number, using this ByteReader's endianness.
  uint64 ReadEightBytes(const char* buffer) const;

  // Read an unsigned LEB128 (Little Endian Base 128) number from
-  // BUFFER and return it as an unsigned 64 bit integer.  LEN is set
-  // to the length read.  Everybody seems to reinvent LEB128 as a
-  // variable size integer encoding, DWARF has had it for a long time.
+  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
+  // the number of bytes read.
+  //
+  // The unsigned LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between 0 and 0x7f, then its unsigned LEB128
+  //   representation is a single byte whose value is N.
+  // 
+  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the unsigned LEB128 representation of N /
+  //   128, rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;

  // Read a signed LEB128 number from BUFFER and return it as an
-  // signed 64 bit integer.  LEN is set to the length read.
+  // signed 64 bit integer. Set LEN to the number of bytes read.
+  //
+  // The signed LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between -0x40 and 0x3f, then its signed LEB128
+  //   representation is a single byte whose value is N in two's
+  //   complement.
+  // 
+  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the signed LEB128 representation of N / 128,
+  //   rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;

-  // Read an offset from BUFFER and return it as an unsigned 64 bit
-  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
-  // generally depending on the amount of DWARF2/3 info present.
-  uint64 ReadOffset(const char* buffer) const;
+  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
+  // must be either 4 or 8. (DWARF allows addresses to be any number of
+  // bytes in length from 1 to 255, but we only support 32- and 64-bit
+  // addresses at the moment.) You must call this before using the
+  // ReadAddress member function.
+  //
+  // For data in a .debug_info section, or something that .debug_info
+  // refers to like line number or macro data, the compilation unit
+  // header's address_size field indicates the address size to use. Call
+  // frame information doesn't indicate its address size (a shortcoming of
+  // the spec); you must supply the appropriate size based on the
+  // architecture of the target machine.
+  void SetAddressSize(uint8 size);
+
+  // Return the current address size, in bytes. This is either 4,
+  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
+  uint8 AddressSize() const { return address_size_; }

  // Read an address from BUFFER and return it as an unsigned 64 bit
-  // integer.  DWARF2/3 allow addresses to be any size from 0-255
-  // bytes currently.  Internally we support 4 and 8 byte addresses,
-  // and will CHECK on anything else.
+  // integer, respecting this ByteReader's endianness and address size. You
+  // must call SetAddressSize before calling this function.
  uint64 ReadAddress(const char* buffer) const;

-  // Read a DWARF2/3 initial length field from START, and report the
-  // length of the length field in LEN. Return the value of the length
-  // field. Set this reader's offset size as indicated by the length
-  // field's encoding.
+  // DWARF actually defines two slightly different formats: 32-bit DWARF
+  // and 64-bit DWARF. This is *not* related to the size of registers or
+  // addresses on the target machine; it refers only to the size of section
+  // offsets and data lengths appearing in the DWARF data. One only needs
+  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
+  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
+  // debugging data itself is very large.
+  //
+  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
+  // compilation unit and call frame information entry begins with an
+  // "initial length" field, which, in addition to giving the length of the
+  // data, also indicates the size of section offsets and lengths appearing
+  // in that data. The ReadInitialLength member function, below, reads an
+  // initial length and sets the ByteReader's offset size as a side effect.
+  // Thus, in the normal process of reading DWARF data, the appropriate
+  // offset size is set automatically. So, you should only need to call
+  // SetOffsetSize if you are using the same ByteReader to jump from the
+  // midst of one block of DWARF data into another.
+
+  // Read a DWARF "initial length" field from START, and return it as
+  // an unsigned 64 bit integer, respecting this ByteReader's
+  // endianness. Set *LEN to the length of the initial length in
+  // bytes, either four or twelve. As a side effect, set this
+  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
+  // initial length) or 8 (if we see a 64-bit DWARF initial length).
+  //
+  // A DWARF initial length is either:
+  //
+  // - a byte count stored as an unsigned 32-bit value less than
+  //   0xffffff00, indicating that the data whose length is being
+  //   measured uses the 32-bit DWARF format, or
+  //
+  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
+  //   indicating that the data whose length is being measured uses
+  //   the 64-bit DWARF format.
  uint64 ReadInitialLength(const char* start, size_t* len);

+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
+  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
+  // long. You must call ReadInitialLength or SetOffsetSize before calling
+  // this function; see the comments above for details.
+  uint64 ReadOffset(const char* buffer) const;
+
+  // Return the current offset size, in bytes.
+  // A return value of 4 indicates that we are reading 32-bit DWARF.
+  // A return value of 8 indicates that we are reading 64-bit DWARF.
+  uint8 OffsetSize() const { return offset_size_; }
+
+  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
+  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
+  // Usually, you should not call this function yourself; instead, let a
+  // call to ReadInitialLength establish the data's offset size
+  // automatically.
+  void SetOffsetSize(uint8 size);
+
 private:

  // Function pointer type for our address and offset readers.