diff --git a/src/processor/stackwalker_x86.cc b/src/processor/stackwalker_x86.cc index f24667c9..6ea18319 100644 --- a/src/processor/stackwalker_x86.cc +++ b/src/processor/stackwalker_x86.cc @@ -36,13 +36,14 @@ #include "processor/postfix_evaluator-inl.h" -#include "processor/stackwalker_x86.h" #include "google_breakpad/processor/call_stack.h" #include "google_breakpad/processor/code_modules.h" #include "google_breakpad/processor/memory_region.h" -#include "google_breakpad/processor/stack_frame_cpu.h" #include "google_breakpad/processor/source_line_resolver_interface.h" +#include "google_breakpad/processor/stack_frame_cpu.h" #include "processor/logging.h" +#include "processor/scoped_ptr.h" +#include "processor/stackwalker_x86.h" #include "processor/windows_frame_info.h" namespace google_breakpad { @@ -72,7 +73,7 @@ StackFrameX86::~StackFrameX86() { windows_frame_info = NULL; } -StackFrame* StackwalkerX86::GetContextFrame() { +StackFrame *StackwalkerX86::GetContextFrame() { if (!context_ || !memory_) { BPLOG(ERROR) << "Can't get context frame without context or memory"; return NULL; @@ -90,23 +91,23 @@ StackFrame* StackwalkerX86::GetContextFrame() { return frame; } - -StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { - if (!memory_ || !stack) { - BPLOG(ERROR) << "Can't get caller frame without memory or stack"; - return NULL; - } +StackFrameX86 *StackwalkerX86::GetCallerByWindowsFrameInfo( + const vector &frames, + WindowsFrameInfo *last_frame_info) { StackFrameX86::FrameTrust trust = StackFrameX86::FRAME_TRUST_NONE; - StackFrameX86 *last_frame = static_cast( - stack->frames()->back()); - WindowsFrameInfo *last_frame_info - = resolver_->FindWindowsFrameInfo(last_frame); + StackFrameX86 *last_frame = static_cast(frames.back()); // Save the stack walking info we found, in case we need it later to // find the callee of the frame we're constructing now. last_frame->windows_frame_info = last_frame_info; + // This function only covers the full STACK WIN case. If + // last_frame_info is VALID_PARAMETER_SIZE-only, then we should + // assume the traditional frame format or use some other strategy. + if (last_frame_info->valid != WindowsFrameInfo::VALID_ALL) + return NULL; + // This stackwalker sets each frame's %esp to its value immediately prior // to the CALL into the callee. This means that %esp points to the last // callee argument pushed onto the stack, which may not be where %esp points @@ -139,12 +140,11 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { // are unknown, 0 is also used in that case. When that happens, it should // be possible to walk to the next frame without reference to %esp. - int frames_already_walked = stack->frames()->size(); u_int32_t last_frame_callee_parameter_size = 0; + int frames_already_walked = frames.size(); if (frames_already_walked >= 2) { - StackFrameX86 *last_frame_callee - = static_cast((*stack->frames()) - [frames_already_walked - 2]); + const StackFrameX86 *last_frame_callee + = static_cast(frames[frames_already_walked - 2]); WindowsFrameInfo *last_frame_callee_info = last_frame_callee->windows_frame_info; if (last_frame_callee_info && @@ -157,148 +157,105 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { // Set up the dictionary for the PostfixEvaluator. %ebp and %esp are used // in each program string, and their previous values are known, so set them - // here. .cbCalleeParams is a Breakpad extension that allows us to use - // the PostfixEvaluator engine when certain types of debugging information - // are present without having to write the constants into the program string - // as literals. + // here. PostfixEvaluator::DictionaryType dictionary; + // Provide the current register values. dictionary["$ebp"] = last_frame->context.ebp; dictionary["$esp"] = last_frame->context.esp; + // Provide constants from the debug info for last_frame and its callee. + // .cbCalleeParams is a Breakpad extension that allows us to use the + // PostfixEvaluator engine when certain types of debugging information + // are present without having to write the constants into the program + // string as literals. dictionary[".cbCalleeParams"] = last_frame_callee_parameter_size; + dictionary[".cbSavedRegs"] = last_frame_info->saved_register_size; + dictionary[".cbLocals"] = last_frame_info->local_size; + dictionary[".raSearchStart"] = last_frame->context.esp + + last_frame_callee_parameter_size + + last_frame_info->local_size + + last_frame_info->saved_register_size; + dictionary[".cbParams"] = last_frame_info->parameter_size; - if (last_frame_info && last_frame_info->valid == WindowsFrameInfo::VALID_ALL) { - // FPO debugging data is available. Initialize constants. - dictionary[".cbSavedRegs"] = last_frame_info->saved_register_size; - dictionary[".cbLocals"] = last_frame_info->local_size; - dictionary[".raSearchStart"] = last_frame->context.esp + - last_frame_callee_parameter_size + - last_frame_info->local_size + - last_frame_info->saved_register_size; - } - if (last_frame_info && - last_frame_info->valid & WindowsFrameInfo::VALID_PARAMETER_SIZE) { - // This is treated separately because it can either come from FPO data or - // from other debugging data. - dictionary[".cbParams"] = last_frame_info->parameter_size; - } - - // Decide what type of program string to use. The program string is in + // Decide what type of program string to use. The program string is in // postfix notation and will be passed to PostfixEvaluator::Evaluate. // Given the dictionary and the program string, it is possible to compute // the return address and the values of other registers in the calling - // function. When encountering a nontraditional frame (one which takes - // advantage of FPO), the stack may need to be scanned for these values. - // For traditional frames, simple deterministic dereferencing suffices - // without any need for scanning. The results of program string evaluation + // function. Because of bugs described below, the stack may need to be + // scanned for these values. The results of program string evaluation // will be used to determine whether to scan for better values. string program_string; - bool traditional_frame = true; bool recover_ebp = true; - if (last_frame_info && last_frame_info->valid == WindowsFrameInfo::VALID_ALL) { - // FPO data available. - traditional_frame = false; - trust = StackFrameX86::FRAME_TRUST_CFI; - if (!last_frame_info->program_string.empty()) { - // The FPO data has its own program string, which will tell us how to - // get to the caller frame, and may even fill in the values of - // nonvolatile registers and provide pointers to local variables and - // parameters. In some cases, particularly with program strings that use - // .raSearchStart, the stack may need to be scanned afterward. - program_string = last_frame_info->program_string; - } else if (last_frame_info->allocates_base_pointer) { - // The function corresponding to the last frame doesn't use the frame - // pointer for conventional purposes, but it does allocate a new - // frame pointer and use it for its own purposes. Its callee's - // information is still accessed relative to %esp, and the previous - // value of %ebp can be recovered from a location in its stack frame, - // within the saved-register area. - // - // Functions that fall into this category use the %ebp register for - // a purpose other than the frame pointer. They restore the caller's - // %ebp before returning. These functions create their stack frame - // after a CALL by decrementing the stack pointer in an amount - // sufficient to store local variables, and then PUSHing saved - // registers onto the stack. Arguments to a callee function, if any, - // are PUSHed after that. Walking up to the caller, therefore, - // can be done solely with calculations relative to the stack pointer - // (%esp). The return address is recovered from the memory location - // above the known sizes of the callee's parameters, saved registers, - // and locals. The caller's stack pointer (the value of %esp when - // the caller executed CALL) is the location immediately above the - // saved return address. The saved value of %ebp to be restored for - // the caller is at a known location in the saved-register area of - // the stack frame. - // - // For this type of frame, MSVC 14 (from Visual Studio 8/2005) in - // link-time code generation mode (/LTCG and /GL) can generate erroneous - // debugging data. The reported size of saved registers can be 0, - // which is clearly an error because these frames must, at the very - // least, save %ebp. For this reason, in addition to those given above - // about the use of .raSearchStart, the stack may need to be scanned - // for a better return address and a better frame pointer after the - // program string is evaluated. - // - // %eip_new = *(%esp_old + callee_params + saved_regs + locals) - // %ebp_new = *(%esp_old + callee_params + saved_regs - 8) - // %esp_new = %esp_old + callee_params + saved_regs + locals + 4 - program_string = "$eip .raSearchStart ^ = " - "$ebp $esp .cbCalleeParams + .cbSavedRegs + 8 - ^ = " - "$esp .raSearchStart 4 + ="; - } else { - // The function corresponding to the last frame doesn't use %ebp at - // all. The callee frame is located relative to %esp. - // - // The called procedure's instruction pointer and stack pointer are - // recovered in the same way as the case above, except that no - // frame pointer (%ebp) is used at all, so it is not saved anywhere - // in the callee's stack frame and does not need to be recovered. - // Because %ebp wasn't used in the callee, whatever value it has - // is the value that it had in the caller, so it can be carried - // straight through without bringing its validity into question. - // - // Because of the use of .raSearchStart, the stack will possibly be - // examined to locate a better return address after program string - // evaluation. The stack will not be examined to locate a saved - // %ebp value, because these frames do not save (or use) %ebp. - // - // %eip_new = *(%esp_old + callee_params + saved_regs + locals) - // %esp_new = %esp_old + callee_params + saved_regs + locals + 4 - // %ebp_new = %ebp_old - program_string = "$eip .raSearchStart ^ = " - "$esp .raSearchStart 4 + ="; - recover_ebp = false; - } + + trust = StackFrameX86::FRAME_TRUST_CFI; + if (!last_frame_info->program_string.empty()) { + // The FPO data has its own program string, which will tell us how to + // get to the caller frame, and may even fill in the values of + // nonvolatile registers and provide pointers to local variables and + // parameters. In some cases, particularly with program strings that use + // .raSearchStart, the stack may need to be scanned afterward. + program_string = last_frame_info->program_string; + } else if (last_frame_info->allocates_base_pointer) { + // The function corresponding to the last frame doesn't use the frame + // pointer for conventional purposes, but it does allocate a new + // frame pointer and use it for its own purposes. Its callee's + // information is still accessed relative to %esp, and the previous + // value of %ebp can be recovered from a location in its stack frame, + // within the saved-register area. + // + // Functions that fall into this category use the %ebp register for + // a purpose other than the frame pointer. They restore the caller's + // %ebp before returning. These functions create their stack frame + // after a CALL by decrementing the stack pointer in an amount + // sufficient to store local variables, and then PUSHing saved + // registers onto the stack. Arguments to a callee function, if any, + // are PUSHed after that. Walking up to the caller, therefore, + // can be done solely with calculations relative to the stack pointer + // (%esp). The return address is recovered from the memory location + // above the known sizes of the callee's parameters, saved registers, + // and locals. The caller's stack pointer (the value of %esp when + // the caller executed CALL) is the location immediately above the + // saved return address. The saved value of %ebp to be restored for + // the caller is at a known location in the saved-register area of + // the stack frame. + // + // For this type of frame, MSVC 14 (from Visual Studio 8/2005) in + // link-time code generation mode (/LTCG and /GL) can generate erroneous + // debugging data. The reported size of saved registers can be 0, + // which is clearly an error because these frames must, at the very + // least, save %ebp. For this reason, in addition to those given above + // about the use of .raSearchStart, the stack may need to be scanned + // for a better return address and a better frame pointer after the + // program string is evaluated. + // + // %eip_new = *(%esp_old + callee_params + saved_regs + locals) + // %ebp_new = *(%esp_old + callee_params + saved_regs - 8) + // %esp_new = %esp_old + callee_params + saved_regs + locals + 4 + program_string = "$eip .raSearchStart ^ = " + "$ebp $esp .cbCalleeParams + .cbSavedRegs + 8 - ^ = " + "$esp .raSearchStart 4 + ="; } else { - // No FPO information is available for the last frame. Assume that the - // standard %ebp-using x86 calling convention is in use. + // The function corresponding to the last frame doesn't use %ebp at + // all. The callee frame is located relative to %esp. // - // The typical x86 calling convention, when frame pointers are present, - // is for the calling procedure to use CALL, which pushes the return - // address onto the stack and sets the instruction pointer (%eip) to - // the entry point of the called routine. The called routine then - // PUSHes the calling routine's frame pointer (%ebp) onto the stack - // before copying the stack pointer (%esp) to the frame pointer (%ebp). - // Therefore, the calling procedure's frame pointer is always available - // by dereferencing the called procedure's frame pointer, and the return - // address is always available at the memory location immediately above - // the address pointed to by the called procedure's frame pointer. The - // calling procedure's stack pointer (%esp) is 8 higher than the value - // of the called procedure's frame pointer at the time the calling - // procedure made the CALL: 4 bytes for the return address pushed by the - // CALL itself, and 4 bytes for the callee's PUSH of the caller's frame - // pointer. + // The called procedure's instruction pointer and stack pointer are + // recovered in the same way as the case above, except that no + // frame pointer (%ebp) is used at all, so it is not saved anywhere + // in the callee's stack frame and does not need to be recovered. + // Because %ebp wasn't used in the callee, whatever value it has + // is the value that it had in the caller, so it can be carried + // straight through without bringing its validity into question. // - // Instruction and frame pointer recovery for these traditional frames is - // entirely deterministic, and the stack will not be scanned after - // recovering these values. + // Because of the use of .raSearchStart, the stack will possibly be + // examined to locate a better return address after program string + // evaluation. The stack will not be examined to locate a saved + // %ebp value, because these frames do not save (or use) %ebp. // - // %eip_new = *(%ebp_old + 4) - // %esp_new = %ebp_old + 8 - // %ebp_new = *(%ebp_old) - trust = StackFrameX86::FRAME_TRUST_FP; - program_string = "$eip $ebp 4 + ^ = " - "$esp $ebp 8 + = " - "$ebp $ebp ^ ="; + // %eip_new = *(%esp_old + callee_params + saved_regs + locals) + // %esp_new = %esp_old + callee_params + saved_regs + locals + 4 + // %ebp_new = %ebp_old + program_string = "$eip .raSearchStart ^ = " + "$esp .raSearchStart 4 + ="; + recover_ebp = false; } // Now crank it out, making sure that the program string set at least the @@ -331,15 +288,14 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { trust = StackFrameX86::FRAME_TRUST_SCAN; } - // If this stack frame did not use %ebp in a traditional way, locating the - // return address isn't entirely deterministic. In that case, the stack - // can be scanned to locate the return address. + // Since this stack frame did not use %ebp in a traditional way, + // locating the return address isn't entirely deterministic. In that + // case, the stack can be scanned to locate the return address. // - // Even in nontraditional frames, if program string evaluation resulted in - // both %eip and %ebp values of 0, trust that the end of the stack has been + // However, if program string evaluation resulted in both %eip and + // %ebp values of 0, trust that the end of the stack has been // reached and don't scan for anything else. - if (!traditional_frame && - (dictionary["$eip"] != 0 || dictionary["$ebp"] != 0)) { + if (dictionary["$eip"] != 0 || dictionary["$ebp"] != 0) { int offset = 0; // This scan can only be done if a CodeModules object is available, to @@ -401,13 +357,6 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { } } - // Treat an instruction address of 0 as end-of-stack. Treat incorrect stack - // direction as end-of-stack to enforce progress and avoid infinite loops. - if (dictionary["$eip"] == 0 || - dictionary["$esp"] <= last_frame->context.esp) { - return NULL; - } - // Create a new stack frame (ownership will be transferred to the caller) // and fill it in. StackFrameX86 *frame = new StackFrameX86(); @@ -436,19 +385,129 @@ StackFrame* StackwalkerX86::GetCallerFrame(const CallStack *stack) { frame->context_validity |= StackFrameX86::CONTEXT_VALID_EDI; } - // frame->context.eip is the return address, which is one instruction - // past the CALL that caused us to arrive at the callee. Set - // frame->instruction to one less than that. This won't reference the - // beginning of the CALL instruction, but it's guaranteed to be within the - // CALL, which is sufficient to get the source line information to match up - // with the line that contains a function call. Callers that require the - // exact return address value may access the context.eip field of - // StackFrameX86. - frame->instruction = frame->context.eip - 1; + return frame; +} + +StackFrameX86 *StackwalkerX86::GetCallerByEBPAtBase( + const vector &frames) { + StackFrameX86::FrameTrust trust; + StackFrameX86 *last_frame = static_cast(frames.back()); + u_int32_t last_esp = last_frame->context.esp; + u_int32_t last_ebp = last_frame->context.ebp; + + // Assume that the standard %ebp-using x86 calling convention is in + // use. + // + // The typical x86 calling convention, when frame pointers are present, + // is for the calling procedure to use CALL, which pushes the return + // address onto the stack and sets the instruction pointer (%eip) to + // the entry point of the called routine. The called routine then + // PUSHes the calling routine's frame pointer (%ebp) onto the stack + // before copying the stack pointer (%esp) to the frame pointer (%ebp). + // Therefore, the calling procedure's frame pointer is always available + // by dereferencing the called procedure's frame pointer, and the return + // address is always available at the memory location immediately above + // the address pointed to by the called procedure's frame pointer. The + // calling procedure's stack pointer (%esp) is 8 higher than the value + // of the called procedure's frame pointer at the time the calling + // procedure made the CALL: 4 bytes for the return address pushed by the + // CALL itself, and 4 bytes for the callee's PUSH of the caller's frame + // pointer. + // + // %eip_new = *(%ebp_old + 4) + // %esp_new = %ebp_old + 8 + // %ebp_new = *(%ebp_old) + + u_int32_t caller_eip, caller_esp, caller_ebp; + + if (memory_->GetMemoryAtAddress(last_ebp + 4, &caller_eip) && + memory_->GetMemoryAtAddress(last_ebp, &caller_ebp)) { + caller_esp = last_ebp + 8; + trust = StackFrameX86::FRAME_TRUST_FP; + } else { + // We couldn't read the memory %ebp refers to. It may be that %ebp + // is pointing to non-stack memory. We'll scan the stack for a + // return address. This can happen if last_frame is executing code + // for a module for which we don't have symbols, and that module + // is compiled without a frame pointer. + if (!ScanForReturnAddress(last_esp, &caller_esp, &caller_eip)) { + // if we can't find an instruction pointer even with stack scanning, + // give up. + return false; + } + + // ScanForReturnAddress found a reasonable return address. Advance + // %esp to the location above the one where the return address was + // found. Assume that %ebp is unchanged. + caller_esp += 4; + caller_ebp = last_ebp; + + trust = StackFrameX86::FRAME_TRUST_SCAN; + } + + // Create a new stack frame (ownership will be transferred to the caller) + // and fill it in. + StackFrameX86 *frame = new StackFrameX86(); + + frame->trust = trust; + frame->context = last_frame->context; + frame->context.eip = caller_eip; + frame->context.esp = caller_esp; + frame->context.ebp = caller_ebp; + frame->context_validity = StackFrameX86::CONTEXT_VALID_EIP | + StackFrameX86::CONTEXT_VALID_ESP | + StackFrameX86::CONTEXT_VALID_EBP; return frame; } +StackFrame *StackwalkerX86::GetCallerFrame(const CallStack *stack) { + if (!memory_ || !stack) { + BPLOG(ERROR) << "Can't get caller frame without memory or stack"; + return NULL; + } + + const vector &frames = *stack->frames(); + StackFrameX86 *last_frame = static_cast(frames.back()); + scoped_ptr new_frame; + + // If we have Windows stack walking information, use that. + WindowsFrameInfo *windows_frame_info + = resolver_->FindWindowsFrameInfo(last_frame); + if (windows_frame_info) + new_frame.reset(GetCallerByWindowsFrameInfo(frames, windows_frame_info)); + + // Otherwise, hope that we're using a traditional frame structure. + if (!new_frame.get()) + new_frame.reset(GetCallerByEBPAtBase(frames)); + + // If nothing worked, tell the caller. + if (!new_frame.get()) + return NULL; + + // Treat an instruction address of 0 as end-of-stack. + if (new_frame->context.eip == 0) + return NULL; + + // If the new stack pointer is at a lower address than the old, then + // that's clearly incorrect. Treat this as end-of-stack to enforce + // progress and avoid infinite loops. + if (new_frame->context.esp <= last_frame->context.esp) + return NULL; + + // new_frame->context.eip is the return address, which is one instruction + // past the CALL that caused us to arrive at the callee. Set + // new_frame->instruction to one less than that. This won't reference the + // beginning of the CALL instruction, but it's guaranteed to be within + // the CALL, which is sufficient to get the source line information to + // match up with the line that contains a function call. Callers that + // require the exact return address value may access the context.eip + // field of StackFrameX86. + new_frame->instruction = new_frame->context.eip - 1; + + return new_frame.release(); +} + bool StackwalkerX86::ScanForReturnAddress(u_int32_t location_start, u_int32_t *location_found, u_int32_t *eip_found) { diff --git a/src/processor/stackwalker_x86.h b/src/processor/stackwalker_x86.h index 255c16b3..b4d134ea 100644 --- a/src/processor/stackwalker_x86.h +++ b/src/processor/stackwalker_x86.h @@ -1,3 +1,5 @@ +// -*- mode: c++ -*- + // Copyright (c) 2006, Google Inc. // All rights reserved. // @@ -42,6 +44,7 @@ #include "google_breakpad/common/breakpad_types.h" #include "google_breakpad/common/minidump_format.h" #include "google_breakpad/processor/stackwalker.h" +#include "google_breakpad/processor/stack_frame_cpu.h" namespace google_breakpad { @@ -66,8 +69,22 @@ class StackwalkerX86 : public Stackwalker { // stack conventions (saved %ebp at [%ebp], saved %eip at 4[%ebp], or // alternate conventions as guided by any WindowsFrameInfo available for the // code in question.). - virtual StackFrame* GetContextFrame(); - virtual StackFrame* GetCallerFrame(const CallStack *stack); + virtual StackFrame *GetContextFrame(); + virtual StackFrame *GetCallerFrame(const CallStack *stack); + + // Use windows_frame_info (derived from STACK WIN and FUNC records) + // to construct the frame that called frames.back(). The caller + // takes ownership of the returned frame. Return NULL on failure. + StackFrameX86 *GetCallerByWindowsFrameInfo( + const vector &frames, + WindowsFrameInfo *windows_frame_info); + + // Assuming a traditional frame layout --- where the caller's %ebp + // has been pushed just after the return address and the callee's + // %ebp points to the saved %ebp --- construct the frame that called + // frames.back(). The caller takes ownership of the returned frame. + // Return NULL on failure. + StackFrameX86 *GetCallerByEBPAtBase(const vector &frames); // Scan the stack starting at location_start, looking for an address // that looks like a valid instruction pointer. Addresses must