Add and handle new dwarf5 string-related forms.

Adding the new forms by type and processing should avoid the problems with 0c0e24f709, where new forms weren't handled in switch statements, breaking the build. Testing this should follow the testing for DW_FORM_GNU_str_index, very closely, but there doesn't appear to be any tests for that, or even DW_FORM_strp. Change-Id: I609d56b1dc879971bfef1070f063f8457fec6017 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/2233839 Reviewed-by: Mike Frysinger <vapier@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org>
2024-11-28 01:54:14 +01:00 · 2020-06-22 16:38:16 -07:00 · 2020-06-22 16:38:16 -07:00 · feb2dca989
commit feb2dca989
parent 2757a2c9c8
3 changed files with 119 additions and 31 deletions
--- a/src/common/dwarf/dwarf2enums.h
+++ b/src/common/dwarf/dwarf2enums.h
@ -149,7 +149,21 @@ enum DwarfForm {
  DW_FORM_sec_offset = 0x17,
  DW_FORM_exprloc = 0x18,
  DW_FORM_flag_present = 0x19,
+
+  // Added in DWARF 5:
+  DW_FORM_strx = 0x1a,
+  DW_FORM_strp_sup = 0x1d,
+  DW_FORM_line_strp = 0x1f,
+
+  // DWARF 4, but value out of order.
  DW_FORM_ref_sig8 = 0x20,
+
+  // Added in DWARF 5:
+  DW_FORM_strx1 = 0x25,
+  DW_FORM_strx2 = 0x26,
+  DW_FORM_strx3 = 0x27,
+  DW_FORM_strx4 = 0x28,
+
  // Extensions for Fission.  See http://gcc.gnu.org/wiki/DebugFission.
  DW_FORM_GNU_addr_index = 0x1f01,
  DW_FORM_GNU_str_index = 0x1f02
--- a/src/common/dwarf/dwarf2reader.cc
+++ b/src/common/dwarf/dwarf2reader.cc
@ -33,7 +33,6 @@

 #include "common/dwarf/dwarf2reader.h"

-#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@ -60,6 +59,7 @@ CompilationUnit::CompilationUnit(const string& path,
    : path_(path), offset_from_section_start_(offset), reader_(reader),
      sections_(sections), handler_(handler), abbrevs_(),
      string_buffer_(NULL), string_buffer_length_(0),
+      line_string_buffer_(NULL), line_string_buffer_length_(0),
      str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
      addr_buffer_(NULL), addr_buffer_length_(0),
      is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
@ -99,12 +99,8 @@ void CompilationUnit::ReadAbbrevs() {
  if (abbrevs_)
    return;

-  // First get the debug_abbrev section.  ".debug_abbrev" is the name
-  // recommended in the DWARF spec, and used on Linux;
-  // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
-  SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
-  if (iter == sections_.end())
-    iter = sections_.find("__debug_abbrev");
+  // First get the debug_abbrev section.
+  SectionMap::const_iterator iter = GetSectionByName(".debug_abbrev");
  assert(iter != sections_.end());

  abbrevs_ = new std::vector<Abbrev>;
@ -189,12 +185,17 @@ const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
    case DW_FORM_data1:
    case DW_FORM_flag:
    case DW_FORM_ref1:
+    case DW_FORM_strx1:
      return start + 1;
    case DW_FORM_ref2:
    case DW_FORM_data2:
+    case DW_FORM_strx2:
      return start + 2;
+    case DW_FORM_strx3:
+      return start + 3;
    case DW_FORM_ref4:
    case DW_FORM_data4:
+    case DW_FORM_strx4:
      return start + 4;
    case DW_FORM_ref8:
    case DW_FORM_data8:
@ -204,6 +205,7 @@ const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
      return start + strlen(reinterpret_cast<const char *>(start)) + 1;
    case DW_FORM_udata:
    case DW_FORM_ref_udata:
+    case DW_FORM_strx:
    case DW_FORM_GNU_str_index:
    case DW_FORM_GNU_addr_index:
      reader_->ReadUnsignedLEB128(start, &len);
@ -237,6 +239,8 @@ const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
      return start + size + len;
    }
    case DW_FORM_strp:
+    case DW_FORM_line_strp:
+    case DW_FORM_strp_sup:
    case DW_FORM_sec_offset:
      return start + reader_->OffsetSize();
  }
@ -284,12 +288,8 @@ void CompilationUnit::ReadHeader() {
 }

 uint64_t CompilationUnit::Start() {
-  // First get the debug_info section.  ".debug_info" is the name
-  // recommended in the DWARF spec, and used on Linux; "__debug_info"
-  // is the name used in Mac OS X Mach-O files.
-  SectionMap::const_iterator iter = sections_.find(".debug_info");
-  if (iter == sections_.end())
-    iter = sections_.find("__debug_info");
+  // First get the debug_info section.
+  SectionMap::const_iterator iter = GetSectionByName(".debug_info");
  assert(iter != sections_.end());

  // Set up our buffer
@ -319,26 +319,29 @@ uint64_t CompilationUnit::Start() {
  // Otherwise, continue by reading our abbreviation entries.
  ReadAbbrevs();

-  // Set the string section if we have one.  ".debug_str" is the name
-  // recommended in the DWARF spec, and used on Linux; "__debug_str"
-  // is the name used in Mac OS X Mach-O files.
-  iter = sections_.find(".debug_str");
-  if (iter == sections_.end())
-    iter = sections_.find("__debug_str");
+  // Set the string section if we have one.
+  iter = GetSectionByName(".debug_str");
  if (iter != sections_.end()) {
    string_buffer_ = iter->second.first;
    string_buffer_length_ = iter->second.second;
  }

+  // Set the line string section if we have one.
+  iter = GetSectionByName(".debug_line_str");
+  if (iter != sections_.end()) {
+    line_string_buffer_ = iter->second.first;
+    line_string_buffer_length_ = iter->second.second;
+  }
+
  // Set the string offsets section if we have one.
-  iter = sections_.find(".debug_str_offsets");
+  iter = GetSectionByName(".debug_str_offsets");
  if (iter != sections_.end()) {
    str_offsets_buffer_ = iter->second.first;
    str_offsets_buffer_length_ = iter->second.second;
  }

  // Set the address section if we have one.
-  iter = sections_.find(".debug_addr");
+  iter = GetSectionByName(".debug_addr");
  if (iter != sections_.end()) {
    addr_buffer_ = iter->second.first;
    addr_buffer_length_ = iter->second.second;
@ -358,6 +361,20 @@ uint64_t CompilationUnit::Start() {
  return ourlength;
 }

+void CompilationUnit::ProcessFormStringIndex(
+    uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form,
+    uint64_t str_index) {
+  const uint8_t* offset_ptr =
+      str_offsets_buffer_ + str_index * reader_->OffsetSize();
+  const uint64_t offset = reader_->ReadOffset(offset_ptr);
+  if (offset >= string_buffer_length_) {
+    return;
+  }
+
+  const char* str = reinterpret_cast<const char*>(string_buffer_) + offset;
+  ProcessAttributeString(dieoffset, attr, form, str);
+}
+
 // If one really wanted, you could merge SkipAttribute and
 // ProcessAttribute
 // This is all boring data manipulation and calling of the handler.
@ -498,21 +515,51 @@ const uint8_t *CompilationUnit::ProcessAttribute(
      ProcessAttributeString(dieoffset, attr, form, str);
      return start + reader_->OffsetSize();
    }
+    case DW_FORM_line_strp: {
+      assert(line_string_buffer_ != NULL);

+      const uint64_t offset = reader_->ReadOffset(start);
+      assert(line_string_buffer_ + offset <
+             line_string_buffer_ + line_string_buffer_length_);
+
+      const char* str =
+          reinterpret_cast<const char*>(line_string_buffer_ + offset);
+      ProcessAttributeString(dieoffset, attr, form, str);
+      return start + reader_->OffsetSize();
+    }
+    case DW_FORM_strp_sup:
+      // No support currently for suplementary object files.
+      fprintf(stderr, "Unhandled form type: DW_FORM_strp_sup\n");
+      return start + 4;
+
+    case DW_FORM_strx:
    case DW_FORM_GNU_str_index: {
      uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
-      const uint8_t* offset_ptr =
-          str_offsets_buffer_ + str_index * reader_->OffsetSize();
-      const uint64_t offset = reader_->ReadOffset(offset_ptr);
-      if (offset >= string_buffer_length_) {
-        return NULL;
+      ProcessFormStringIndex(dieoffset, attr, form, str_index);
+      return start + len;
+    }
+    case DW_FORM_strx1: {
+      uint64_t str_index = reader_->ReadOneByte(start);
+      ProcessFormStringIndex(dieoffset, attr, form, str_index);
+      return start + 1;
+    }
+    case DW_FORM_strx2: {
+      uint64_t str_index = reader_->ReadTwoBytes(start);
+      ProcessFormStringIndex(dieoffset, attr, form, str_index);
+      return start + 2;
+    }
+    case DW_FORM_strx3: {
+      uint64_t str_index = reader_->ReadTwoBytes(start);
+      str_index *= reader_->ReadOneByte(start + 2);
+      ProcessFormStringIndex(dieoffset, attr, form, str_index);
+      return start + 3;
+    }
+    case DW_FORM_strx4: {
+      uint64_t str_index = reader_->ReadFourBytes(start);
+      ProcessFormStringIndex(dieoffset, attr, form, str_index);
+      return start + 4;
    }

-      const char* str = reinterpret_cast<const char *>(string_buffer_) + offset;
-      ProcessAttributeString(dieoffset, attr, form, str);
-      return start + len;
-      break;
-    }
    case DW_FORM_GNU_addr_index: {
      uint64_t addr_index = reader_->ReadUnsignedLEB128(start, &len);
      const uint8_t* addr_ptr =
--- a/src/common/dwarf/dwarf2reader.h
+++ b/src/common/dwarf/dwarf2reader.h
@ -40,6 +40,7 @@
 #ifndef COMMON_DWARF_DWARF2READER_H__
 #define COMMON_DWARF_DWARF2READER_H__

+#include <assert.h>
 #include <stdint.h>

 #include <list>
@ -475,6 +476,14 @@ class CompilationUnit {
    handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
  }

+  // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx,
+  // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4.
+  // Retrieves the data and calls through to ProcessAttributeString.
+  void ProcessFormStringIndex(uint64_t offset,
+                              enum DwarfAttribute attr,
+                              enum DwarfForm form,
+                              uint64_t str_index);
+
  // Called when we have an attribute with string data to give to
  // our handler.  The attribute is for the DIE at OFFSET from the
  // beginning of compilation unit, has a name of ATTR, a form of
@ -508,6 +517,20 @@ class CompilationUnit {
  void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
                                SectionMap* sections);

+  // Abstract away the difference between elf, mach-o, and Mac OS section names.
+  // Elf-names use ".section_name, others use "__section_name".  Pass "name" in
+  // the elf form, ".section_name".
+  const SectionMap::const_iterator GetSectionByName(const char *name) {
+    assert(name[0] == '.');
+    auto iter = sections_.find(name);
+    if (iter != sections_.end())
+      return iter;
+    std::string macho_name("__");
+    macho_name += name + 1;
+    iter = sections_.find(macho_name);
+    return iter;
+  }
+
  // Path of the file containing the debug information.
  const string path_;

@ -542,6 +565,10 @@ class CompilationUnit {
  const uint8_t *string_buffer_;
  uint64_t string_buffer_length_;

+  // Similarly for .debug_line_string.
+  const uint8_t* line_string_buffer_;
+  uint64_t line_string_buffer_length_;
+
  // String offsets section buffer and length, if we have a string offsets
  // section (.debug_str_offsets or .debug_str_offsets.dwo).
  const uint8_t* str_offsets_buffer_;