Handle very large stack traces

The main motivation for this change is to handle very large stack
traces, normally the result of infinite recursion. This part is
actually fairly simple, relaxing a few self-imposed limits on how
many frames we can unwind and the max size for stack memory.

Relaxing these limits requires stricter and more consistent checks for
stack unwinding. There are a number of unwinding invariants that apply
to all the platforms:

1. stack pointer (and frame pointer) must be within the stack memory
   (frame pointer, if preset, must point to the right frame too)
2. unwinding must monotonically increase SP
   (except for the first frame unwind, this must be a strict increase)
3. Instruction pointer (return address) must point to a valid location
4. stack pointer (and frame pointer) must be appropriately aligned

This change is focused on 2), which is enough to guarantee that the
unwinding doesn't get stuck in an infinite loop.

1) is implicitly validated part of accessing the stack memory
   (explicit checks might be nice though).
4) is ABI specific and while it may be valuable in catching suspicious
   frames is not in the scope of this change.
3) is also an interesting check but thanks to just-in-time compilation
   it's more complex than just calling 
   StackWalker::InstructionAddressSeemsValid() 
   and we don't want to drop parts of the callstack due to an overly
   conservative check.

Bug: chromium:735989

Change-Id: I9aaba77c7fd028942d77c87d51b5e6f94e136ddd
Reviewed-on: https://chromium-review.googlesource.com/563771
Reviewed-by: Mark Mentovai <mark@chromium.org>
Reviewed-by: Ivan Penkov <ivanpe@chromium.org>
This commit is contained in:
Leonard Mosescu 2017-07-11 12:26:50 -07:00
parent 5f112cb174
commit 01431c2f61
14 changed files with 108 additions and 81 deletions

View File

@ -257,8 +257,7 @@ class MinidumpMemoryRegion : public MinidumpObject,
bool hexdump_;
unsigned int hexdump_width_;
// The largest memory region that will be read from a minidump. The
// default is 1MB.
// The largest memory region that will be read from a minidump.
static uint32_t max_bytes_;
// Base address and size of the memory region, and its position in the

View File

@ -126,7 +126,15 @@ class Stackwalker {
// * This address is within a loaded module for which we have symbols,
// and falls inside a function in that module.
// Returns false otherwise.
bool InstructionAddressSeemsValid(uint64_t address);
bool InstructionAddressSeemsValid(uint64_t address) const;
// Checks whether we should stop the stack trace.
// (either we reached the end-of-stack or we detected a
// broken callstack invariant)
bool TerminateWalk(uint64_t caller_ip,
uint64_t caller_sp,
uint64_t callee_sp,
bool first_unwind) const;
// The default number of words to search through on the stack
// for a return address.
@ -217,6 +225,13 @@ class Stackwalker {
// the caller. |stack_scan_allowed| controls whether stack scanning is
// an allowable frame-recovery method, since it is desirable to be able to
// disable stack scanning in performance-critical use cases.
//
// CONSIDER: a way to differentiate between:
// - full stack traces
// - explicitly truncated traces (max_frames_)
// - stopping after max scanned frames
// - failed stack walk (breaking one of the stack walk invariants)
//
virtual StackFrame* GetCallerFrame(const CallStack* stack,
bool stack_scan_allowed) = 0;

View File

@ -132,7 +132,7 @@ TEST_F(MicrodumpProcessorTest, TestProcess_MissingSymbols) {
ASSERT_EQ("arm64", state.system_info()->cpu);
ASSERT_EQ("OS 64 VERSION INFO", state.system_info()->os_version);
ASSERT_EQ(1U, state.threads()->size());
ASSERT_EQ(12U, state.threads()->at(0)->frames()->size());
ASSERT_EQ(11U, state.threads()->at(0)->frames()->size());
ASSERT_EQ("",
state.threads()->at(0)->frames()->at(0)->function_name);
@ -205,7 +205,7 @@ TEST_F(MicrodumpProcessorTest, TestProcessX86) {
ASSERT_EQ("x86", state.system_info()->cpu);
ASSERT_EQ("asus/WW_Z00A/Z00A:5.0/LRX21V/2.19.40.22_20150627_5104_user:user/"
"release-keys", state.system_info()->os_version);
ASSERT_EQ(56U, state.threads()->at(0)->frames()->size());
ASSERT_EQ(17U, state.threads()->at(0)->frames()->size());
ASSERT_EQ("libc.so",
state.threads()->at(0)->frames()->at(0)->module->debug_file());
// TODO(mmandlis): Get symbols for the test X86 microdump and test function

View File

@ -1195,7 +1195,7 @@ bool MinidumpContext::CheckAgainstSystemInfo(uint32_t context_cpu_type) {
//
uint32_t MinidumpMemoryRegion::max_bytes_ = 2 * 1024 * 1024; // 2MB
uint32_t MinidumpMemoryRegion::max_bytes_ = 64 * 1024 * 1024; // 64MB
MinidumpMemoryRegion::MinidumpMemoryRegion(Minidump* minidump)

View File

@ -60,10 +60,15 @@ namespace google_breakpad {
const int Stackwalker::kRASearchWords = 40;
uint32_t Stackwalker::max_frames_ = 1024;
// This default is just a sanity check: a large enough value
// that allow capturing unbounded recursion traces, yet provide a
// guardrail against stack walking bugs. The stack walking invariants
// guarantee that the unwinding process is strictly monotonic and
// practically bounded by the size of the stack memory range.
uint32_t Stackwalker::max_frames_ = 1 << 20; // 1M
bool Stackwalker::max_frames_set_ = false;
uint32_t Stackwalker::max_frames_scanned_ = 1024;
uint32_t Stackwalker::max_frames_scanned_ = 1 << 14; // 16k
Stackwalker::Stackwalker(const SystemInfo* system_info,
MemoryRegion* memory,
@ -271,7 +276,33 @@ Stackwalker* Stackwalker::StackwalkerForCPU(
return cpu_stackwalker;
}
bool Stackwalker::InstructionAddressSeemsValid(uint64_t address) {
// CONSIDER: check stack alignment?
bool Stackwalker::TerminateWalk(uint64_t caller_ip,
uint64_t caller_sp,
uint64_t callee_sp,
bool first_unwind) const {
// Treat an instruction address less than 4k as end-of-stack.
// (using InstructionAddressSeemsValid() here is very tempting,
// but we need to handle JITted code)
if (caller_ip < (1 << 12)) {
return true;
}
// NOTE: The stack address range is implicitly checked
// when the stack memory is accessed.
// The stack pointer should monotonically increase. For first unwind
// we allow caller_sp == callee_sp to account for architectures where
// the return address is stored in a register (so it's possible to have
// leaf functions which don't move the stack pointer)
if (first_unwind ? (caller_sp < callee_sp) : (caller_sp <= callee_sp)) {
return true;
}
return false;
}
bool Stackwalker::InstructionAddressSeemsValid(uint64_t address) const {
StackFrame frame;
frame.instruction = address;
StackFrameSymbolizer::SymbolizerResult symbolizer_result =

View File

@ -147,23 +147,6 @@ StackFrameAMD64* StackwalkerAMD64::GetCallerByCFIFrameInfo(
return frame.release();
}
bool StackwalkerAMD64::IsEndOfStack(uint64_t caller_rip, uint64_t caller_rsp,
uint64_t callee_rsp) {
// Treat an instruction address of 0 as end-of-stack.
if (caller_rip == 0) {
return true;
}
// If the new stack pointer is at a lower address than the old, then
// that's clearly incorrect. Treat this as end-of-stack to enforce
// progress and avoid infinite loops.
if (caller_rsp < callee_rsp) {
return true;
}
return false;
}
// Returns true if `ptr` is not in x86-64 canonical form.
// https://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
static bool is_non_canonical(uint64_t ptr) {
@ -173,7 +156,6 @@ static bool is_non_canonical(uint64_t ptr) {
StackFrameAMD64* StackwalkerAMD64::GetCallerByFramePointerRecovery(
const vector<StackFrame*>& frames) {
StackFrameAMD64* last_frame = static_cast<StackFrameAMD64*>(frames.back());
uint64_t last_rsp = last_frame->context.rsp;
uint64_t last_rbp = last_frame->context.rbp;
// Assume the presence of a frame pointer. This is not mandated by the
@ -208,10 +190,8 @@ StackFrameAMD64* StackwalkerAMD64::GetCallerByFramePointerRecovery(
return NULL;
}
// Simple sanity check that the stack is growing downwards as expected.
if (IsEndOfStack(caller_rip, caller_rsp, last_rsp) ||
caller_rbp < last_rbp) {
// Reached end-of-stack or stack is not growing downwards.
// Check that rbp is within the right frame
if (caller_rsp <= last_rbp || caller_rbp < caller_rsp) {
return NULL;
}
@ -327,9 +307,9 @@ StackFrame* StackwalkerAMD64::GetCallerFrame(const CallStack* stack,
new_frame->context.rbp = static_cast<uint32_t>(new_frame->context.rbp);
}
if (IsEndOfStack(new_frame->context.rip, new_frame->context.rsp,
last_frame->context.rsp)) {
// Reached end-of-stack.
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(new_frame->context.rip, new_frame->context.rsp,
last_frame->context.rsp, frames.size() == 1)) {
return NULL;
}

View File

@ -78,14 +78,6 @@ class StackwalkerAMD64 : public Stackwalker {
StackFrameAMD64* GetCallerByCFIFrameInfo(const vector<StackFrame*> &frames,
CFIFrameInfo* cfi_frame_info);
// Checks whether end-of-stack is reached. An instruction address of 0 is an
// end-of-stack marker. If the stack pointer of the caller is at a lower
// address than the stack pointer of the callee, then that's clearly incorrect
// and it is treated as end-of-stack to enforce progress and avoid infinite
// loops.
bool IsEndOfStack(uint64_t caller_rip, uint64_t caller_rsp,
uint64_t callee_rsp);
// Assumes a traditional frame layout where the frame pointer has not been
// omitted. The expectation is that caller's %rbp is pushed to the stack
// after the return address of the callee, and that the callee's %rsp can

View File

@ -267,17 +267,13 @@ StackFrame* StackwalkerARM::GetCallerFrame(const CallStack* stack,
if (!frame.get())
return NULL;
// An instruction address of zero marks the end of the stack.
if (frame->context.iregs[MD_CONTEXT_ARM_REG_PC] == 0)
return NULL;
// If the new stack pointer is at a lower address than the old, then
// that's clearly incorrect. Treat this as end-of-stack to enforce
// progress and avoid infinite loops.
if (frame->context.iregs[MD_CONTEXT_ARM_REG_SP]
< last_frame->context.iregs[MD_CONTEXT_ARM_REG_SP])
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(frame->context.iregs[MD_CONTEXT_ARM_REG_PC],
frame->context.iregs[MD_CONTEXT_ARM_REG_SP],
last_frame->context.iregs[MD_CONTEXT_ARM_REG_SP],
frames.size() == 1)) {
return NULL;
}
// The new frame's context's PC is the return address, which is one
// instruction past the instruction that caused us to arrive at the

View File

@ -252,16 +252,13 @@ StackFrame* StackwalkerARM64::GetCallerFrame(const CallStack* stack,
if (!frame.get())
return NULL;
// An instruction address of zero marks the end of the stack.
if (frame->context.iregs[MD_CONTEXT_ARM64_REG_PC] == 0)
return NULL;
// If the new stack pointer is at a lower address than the old, then
// that's clearly incorrect. Treat this as end-of-stack to enforce
// progress and avoid infinite loops.
if (frame->context.iregs[MD_CONTEXT_ARM64_REG_SP]
< last_frame->context.iregs[MD_CONTEXT_ARM64_REG_SP])
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(frame->context.iregs[MD_CONTEXT_ARM64_REG_PC],
frame->context.iregs[MD_CONTEXT_ARM64_REG_SP],
last_frame->context.iregs[MD_CONTEXT_ARM64_REG_SP],
frames.size() == 1)) {
return NULL;
}
// The new frame's context's PC is the return address, which is one
// instruction past the instruction that caused us to arrive at the callee.

View File

@ -272,16 +272,11 @@ StackFrame* StackwalkerMIPS::GetCallerFrame(const CallStack* stack,
return NULL;
}
// Treat an instruction address of 0 as end-of-stack.
if (new_frame->context.epc == 0) {
return NULL;
}
// If the new stack pointer is at a lower address than the old, then
// that's clearly incorrect. Treat this as end-of-stack to enforce
// progress and avoid infinite loops.
if (new_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP] <
last_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP]) {
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(new_frame->context.epc,
new_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP],
last_frame->context.iregs[MD_CONTEXT_MIPS_REG_SP],
frames.size() == 1)) {
return NULL;
}

View File

@ -130,6 +130,14 @@ StackFrame* StackwalkerPPC::GetCallerFrame(const CallStack* stack,
StackFramePPC::CONTEXT_VALID_GPR1;
frame->trust = StackFrame::FRAME_TRUST_FP;
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(instruction,
stack_pointer,
last_frame->context.gpr[1],
stack->frames()->size() == 1)) {
return NULL;
}
// frame->context.srr0 is the return address, which is one instruction
// past the branch that caused us to arrive at the callee. Set
// frame_ppc->instruction to four less than that. Since all ppc

View File

@ -121,6 +121,14 @@ StackFrame* StackwalkerPPC64::GetCallerFrame(const CallStack* stack,
StackFramePPC64::CONTEXT_VALID_GPR1;
frame->trust = StackFrame::FRAME_TRUST_FP;
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(instruction,
stack_pointer,
last_frame->context.gpr[1],
stack->frames()->size() == 1)) {
return NULL;
}
// frame->context.srr0 is the return address, which is one instruction
// past the branch that caused us to arrive at the callee. Set
// frame_ppc64->instruction to eight less than that. Since all ppc64

View File

@ -111,6 +111,14 @@ StackFrame* StackwalkerSPARC::GetCallerFrame(const CallStack* stack,
return NULL;
}
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(instruction,
stack_pointer,
last_frame->context.g_r[14],
stack->frames()->size() == 1)) {
return NULL;
}
StackFrameSPARC* frame = new StackFrameSPARC();
frame->context = last_frame->context;

View File

@ -659,15 +659,13 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack* stack,
if (!new_frame.get())
return NULL;
// Treat an instruction address of 0 as end-of-stack.
if (new_frame->context.eip == 0)
return NULL;
// If the new stack pointer is at a lower address than the old, then
// that's clearly incorrect. Treat this as end-of-stack to enforce
// progress and avoid infinite loops.
if (new_frame->context.esp <= last_frame->context.esp)
// Should we terminate the stack walk? (end-of-stack or broken invariant)
if (TerminateWalk(new_frame->context.eip,
new_frame->context.esp,
last_frame->context.esp,
frames.size() == 1)) {
return NULL;
}
// new_frame->context.eip is the return address, which is the instruction
// after the CALL that caused us to arrive at the callee. Set