Breakpad Linux dumper: Handle STABS-in-symbol-table, and line number records outside functions.

This patch addresses two differences between Linux and Macintosh OS X STABS
data:

- StabsReader assumes that the STABS entries follow the conventions for
  storing STABS data in object file sections (that is, .stabs and
  .stabstr), rather than in the object files's linker symbol table. On Mac
  OS X, STABS entries live in the Mach-O file's LC_SYMTAB load command,
  along with all the other linker symbols; they are not grouped into units
  by N_UNDF entries.

  This patch adds a boolean argument to the StabsReader constructor
  indicating whether the parser should treat N_UNDF entries as unit
  boundaries; this argument should be true on Linux, and false on Mac. The
  patch changes src/common/linux/dump_symbols.cc to pass this new argument.

- Mac OS X STABS place SLINE (line number) records immediately before the
  FUN record for the function to which they belong, and the values of such
  records are absolute, not relative to the function start.

  This patch extends the parser to queue up such records and report them to
  the handler when we do see the FUN record. The meaning of
  StabsHandler::Line remains unchanged; existing handlers do not need to be
  adjusted.

This patch also adds unit tests for the new parser behaviors.

a=jimblandy, r=mark


git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@587 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
jimblandy 2010-05-05 17:34:19 +00:00
parent d3e4bbb9c7
commit 775c6f7640
4 changed files with 152 additions and 21 deletions

View File

@ -142,12 +142,13 @@ static bool LoadStabs(const ElfW(Ehdr) *elf_header,
StabsToModule handler(module);
// Find the addresses of the STABS data, and create a STABS reader object.
// On Linux, STABS entries always have 32-bit values, regardless of the
// address size of the architecture whose code they're describing.
// address size of the architecture whose code they're describing, and
// the strings are always "unitized".
uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
stabstr, stabstr_section->sh_size,
big_endian, 4, &handler);
big_endian, 4, true, &handler);
// Read the STABS data, and do post-processing.
if (!reader.Process())
return false;

View File

@ -37,6 +37,8 @@
#include <stab.h>
#include <string.h>
using std::vector;
namespace google_breakpad {
StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
@ -61,11 +63,12 @@ void StabsReader::EntryIterator::Fetch() {
StabsReader::StabsReader(const uint8_t *stab, size_t stab_size,
const uint8_t *stabstr, size_t stabstr_size,
bool big_endian, size_t value_size,
bool big_endian, size_t value_size, bool unitized,
StabsHandler *handler)
: entries_(stab, stab_size),
strings_(stabstr, stabstr_size),
iterator_(&entries_, big_endian, value_size),
unitized_(unitized),
handler_(handler),
string_offset_(0),
next_cu_string_offset_(0),
@ -88,13 +91,14 @@ bool StabsReader::Process() {
if (iterator_->type == N_SO) {
if (! ProcessCompilationUnit())
return false;
} else if (iterator_->type == N_UNDF) {
// At the head of each compilation unit's entries there is an
// N_UNDF stab giving the number of symbols in the compilation
// unit, and the number of bytes that compilation unit's strings
// take up in the .stabstr section. Each CU's strings are
// separate; the n_strx values are offsets within the current
// CU's portion of the .stabstr section.
} else if (iterator_->type == N_UNDF && unitized_) {
// In unitized STABS (including Linux STABS, and pretty much anything
// else that puts STABS data in sections), at the head of each
// compilation unit's entries there is an N_UNDF stab giving the
// number of symbols in the compilation unit, and the number of bytes
// that compilation unit's strings take up in the .stabstr section.
// Each CU's strings are separate; the n_strx values are offsets
// within the current CU's portion of the .stabstr section.
//
// As an optimization, the GNU linker combines all the
// compilation units into one, with a single N_UNDF at the
@ -157,9 +161,26 @@ bool StabsReader::ProcessCompilationUnit() {
if (iterator_->type == N_FUN) {
if (! ProcessFunction())
return false;
} else
} else if (iterator_->type == N_SLINE) {
// Mac OS X STABS place SLINE records before functions.
Line line;
// The value of an N_SLINE entry that appears outside a function is
// the absolute address of the line.
line.address = iterator_->value;
line.filename = current_source_file_;
// The n_desc of a N_SLINE entry is the line number. It's a
// signed 16-bit field; line numbers from 32768 to 65535 are
// stored as n-65536.
line.number = (uint16_t) iterator_->descriptor;
queued_lines_.push_back(line);
++iterator_;
} else if (iterator_->type == N_SOL) {
current_source_file_ = SymbolString();
++iterator_;
} else {
// Ignore anything else.
++iterator_;
}
}
// An N_SO with an empty name indicates the end of the compilation
@ -177,6 +198,8 @@ bool StabsReader::ProcessCompilationUnit() {
if (! handler_->EndCompilationUnit(ending_address))
return false;
queued_lines_.clear();
return true;
}
@ -196,6 +219,14 @@ bool StabsReader::ProcessFunction() {
return false;
++iterator_;
// If there were any SLINE records given before the function, report them now.
for (vector<Line>::const_iterator it = queued_lines_.begin();
it != queued_lines_.end(); it++) {
if (!handler_->Line(it->address, it->filename, it->number))
return false;
}
queued_lines_.clear();
while (!iterator_->at_end) {
if (iterator_->type == N_SO || iterator_->type == N_FUN)
break;

View File

@ -52,8 +52,12 @@
#ifdef HAVE_A_OUT_H
#include <a.out.h>
#endif
#ifdef HAVE_MACH_O_NLIST_H
#include <mach-o/nlist.h>
#endif
#include <string>
#include <vector>
#include "common/byte_cursor.h"
@ -71,14 +75,22 @@ class StabsReader {
//
// BIG_ENDIAN should be true if the entries in the .stab section are in
// big-endian form, or false if they are in little-endian form.
//
// VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
// field in each entry in bytes.
//
// UNITIZED should be true if the STABS data is stored in units with
// N_UNDF headers. This is usually the case for STABS stored in sections,
// like .stab/.stabstr, and usually not the case for STABS stored in the
// actual symbol table; UNITIZED should be true when parsing Linux stabs,
// false when parsing Mac OS X STABS. For details, see:
// http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
//
// Note that, in ELF, the .stabstr section should be found using the
// 'sh_link' field of the .stab section header, not by name.
StabsReader(const uint8_t *stab, size_t stab_size,
const uint8_t *stabstr, size_t stabstr_size,
bool big_endian, size_t value_size,
bool big_endian, size_t value_size, bool unitized,
StabsHandler *handler);
// Process the STABS data, calling the handler's member functions to
@ -159,6 +171,13 @@ class StabsReader {
Entry entry_;
};
// A source line, saved to be reported later.
struct Line {
uint64_t address;
const char *filename;
int number;
};
// Return the name of the current symbol.
const char *SymbolString();
@ -179,6 +198,10 @@ class StabsReader {
// The iterator walking the STABS entries.
EntryIterator iterator_;
// True if the data is "unitized"; see the explanation in the comment for
// StabsReader::StabsReader.
bool unitized_;
StabsHandler *handler_;
// The offset of the current compilation unit's strings within stabstr_.
@ -190,6 +213,11 @@ class StabsReader {
// The current source file name.
const char *current_source_file_;
// Mac OS X STABS place SLINE records before functions; we accumulate a
// vector of these until we see the FUN record, and then report them
// after the StartFunction call.
std::vector<Line> queued_lines_;
};
// Consumer-provided callback structure for the STABS reader. Clients

View File

@ -31,17 +31,18 @@
// stabs_reader_unittest.cc: Unit tests for google_breakpad::StabsReader.
#include <cassert>
#include <cerrno>
#include <cstdarg>
#include <cstdlib>
#include <cstring>
#include <assert.h>
#include <errno.h>
#include <stab.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <sstream>
#include <stab.h>
#include "breakpad_googletest_includes.h"
#include "common/stabs_reader.h"
@ -225,7 +226,7 @@ class MockStabsReaderHandler: public StabsHandler {
};
struct StabsFixture {
StabsFixture() : stabs(&strings) { }
StabsFixture() : stabs(&strings), unitized(true) { }
// Create a StabsReader to parse the mock stabs data in stabs and
// strings, and pass the parsed information to mock_handler. Use the
@ -244,12 +245,14 @@ struct StabsFixture {
stabs_contents.size(),
reinterpret_cast<const uint8_t *>(stabstr_contents.data()),
stabstr_contents.size(),
stabs.endianness() == kBigEndian, stabs.value_size(), &mock_handler);
stabs.endianness() == kBigEndian, stabs.value_size(), unitized,
&mock_handler);
return reader.Process();
}
StringAssembler strings;
StabsAssembler stabs;
bool unitized;
MockStabsReaderHandler mock_handler;
};
@ -401,7 +404,10 @@ TEST_F(Stabs, NoCUEnd) {
ASSERT_TRUE(ApplyHandlerToMockStabsData());
}
TEST_F(Stabs, MultipleCUs) {
// On systems that store STABS in sections, string offsets are relative to
// the beginning of that compilation unit's strings, marked with N_UNDF
// symbols; see the comments for StabsReader::StabsReader.
TEST_F(Stabs, Unitized) {
stabs.set_endianness(kBigEndian);
stabs.set_value_size(4);
stabs
@ -441,6 +447,32 @@ TEST_F(Stabs, MultipleCUs) {
ASSERT_TRUE(ApplyHandlerToMockStabsData());
}
// On systems that store STABS entries in the real symbol table, the N_UNDF
// entries have no special meaning, and shouldn't mess up the string
// indices.
TEST_F(Stabs, NonUnitized) {
stabs.set_endianness(kLittleEndian);
stabs.set_value_size(4);
unitized = false;
stabs
.Stab(N_UNDF, 21, 11551, 0x9bad2b2e, "")
.Stab(N_UNDF, 21, 11551, 0x9bad2b2e, "")
.Stab(N_SO, 71, 45139, 0x11a97352, "Tanzania")
.Stab(N_SO, 221, 41976, 0x21a97352, "");
{
InSequence s;
EXPECT_CALL(mock_handler,
StartCompilationUnit(StrEq("Tanzania"),
0x11a97352, NULL))
.WillOnce(Return(true));
EXPECT_CALL(mock_handler, EndCompilationUnit(0x21a97352))
.WillOnce(Return(true));
}
ASSERT_TRUE(ApplyHandlerToMockStabsData());
}
TEST_F(Stabs, FunctionEnd) {
stabs.set_endianness(kLittleEndian);
stabs.set_value_size(8);
@ -484,6 +516,45 @@ TEST_F(Stabs, FunctionEnd) {
ASSERT_TRUE(ApplyHandlerToMockStabsData());
}
// On Mac OS X, SLINE records can appear before the FUN stab to which they
// belong, and their values are absolute addresses, not offsets.
TEST_F(Stabs, LeadingLine) {
stabs.set_endianness(kBigEndian);
stabs.set_value_size(4);
stabs
.Stab(N_SO, 179, 27357, 0x8adabc15, "build directory/")
.Stab(N_SO, 52, 53058, 0x4c7e3bf4, "compilation unit")
.Stab(N_SOL, 165, 12086, 0x6a797ca3, "source file name")
.Stab(N_SLINE, 229, 20015, 0x4cb3d7e0, "")
.Stab(N_SLINE, 89, 43802, 0x4cba8b88, "")
.Stab(N_FUN, 251, 51639, 0xce1b98fa, "rutabaga")
.Stab(N_FUN, 218, 16113, 0x5798, "")
.Stab(N_SO, 52, 53058, 0xd4af4415, "");
{
InSequence s;
EXPECT_CALL(mock_handler,
StartCompilationUnit(StrEq("compilation unit"),
0x4c7e3bf4, StrEq("build directory/")))
.WillOnce(Return(true));
EXPECT_CALL(mock_handler,
StartFunction(Eq("rutabaga"), 0xce1b98fa))
.WillOnce(Return(true));
EXPECT_CALL(mock_handler,
Line(0x4cb3d7e0, StrEq("source file name"), 20015))
.WillOnce(Return(true));
EXPECT_CALL(mock_handler,
Line(0x4cba8b88, StrEq("source file name"), 43802))
.WillOnce(Return(true));
EXPECT_CALL(mock_handler, EndFunction(0xce1b98fa + 0x5798))
.WillOnce(Return(true));
EXPECT_CALL(mock_handler, EndCompilationUnit(0xd4af4415))
.WillOnce(Return(true));
}
ASSERT_TRUE(ApplyHandlerToMockStabsData());
}
// name duplication
} // anonymous namespace