Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5955,9 +5955,15 @@ void MacroAssembler::remove_frame(int framesize) {

void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
if (needs_stack_repair) {
// Remove the extension of the caller's frame used for inline type unpacking
// The method has a scalarized entry point (where fields of value object arguments
// are passed through registers and stack), and a non-scalarized entry point (where
// value object arguments are given as oops). The non-scalarized entry point will
// first load each field of value object arguments and store them in registers and on
// the stack in a way compatible with the scalarized entry point. To do so, some extra
// stack space might be reserved (if argument registers are not enough). On leaving the
// method, this space must be freed.
//
// Right now the stack looks like this:
// In case we used the non-scalarized entry point the stack looks like this:
//
// | Arguments from caller |
// |---------------------------| <-- caller's SP
Expand All @@ -5983,18 +5989,28 @@ void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair
// will fix only this one. Overall, FP/LR #2 are not reliable and are simply
// needed to add space between the extension space and the locals, as there
// would be between the real arguments and the locals if we don't need to
// do unpacking.
// do unpacking (from the scalarized entry point).
//
// When restoring, one must then load FP #1 into x29, and LR #1 into x30,
// while keeping in mind that from the scalarized entry point, there will be
// only one copy of each.
// only one copy of each. Indeed, in the case we used the scalarized calling
// convention, the stack looks like this:
//
// | Arguments from caller |
// |---------------------------| <-- caller's SP / start of this method's frame
// | Saved LR |
// | Saved FP |
// |---------------------------| <-- FP
// | sp_inc |
// | method locals |
// |---------------------------| <-- SP
//
// The sp_inc stack slot holds the total size of the frame including the
// extension space minus two words for the saved FP and LR. That is how to
// find FP/LR #1. This size is expressed in bytes. Be careful when using it
// from C++ in pointer arithmetic; you might need to divide it by wordSize.
//
// TODO 8371993 store fake values instead of LR/FP#2
// One can find sp_inc since the start the method's frame is SP + initial_framesize.

int sp_inc_offset = initial_framesize - 3 * wordSize; // Immediately below saved LR and FP

Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
#endif

// C2 compiled method's prolog code.
// Beware! This sp_inc is NOT the same as the one mentioned in MacroAssembler::remove_frame but only the size
// of the extension space + the additional copy of the return address. That means, it doesn't contain the
// frame size (where the local and sp_inc are) and the saved RBP.
void C2_MacroAssembler::verified_entry(Compile* C, int sp_inc) {
if (C->clinit_barrier_on_entry()) {
assert(VM_Version::supports_fast_class_init_checks(), "sanity");
Expand Down
25 changes: 25 additions & 0 deletions src/hotspot/cpu/x86/frame_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,31 @@ intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr)
return sender_sp;
}


// See comment in MacroAssembler::remove_frame
frame::CompiledFramePointers frame::compiled_frame_details() const {
// frame owned by optimizing compiler
assert(_cb->frame_size() > 0, "must have non-zero frame size");
intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
assert(sender_sp == real_fp(), "");

// This is the saved value of EBP which may or may not really be an FP.
// It is only an FP if the sender is an interpreter frame (or C1?).
// saved_fp_addr should be correct even for a bottom thawed frame (with a return barrier)
intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);

// Repair the sender sp if the frame has been extended
sender_sp = repair_sender_sp(sender_sp, saved_fp_addr);

CompiledFramePointers cfp;
cfp.sender_sp = sender_sp;
cfp.saved_fp_addr = saved_fp_addr;
// On Intel the return_address is always the word on the stack
cfp.sender_pc_addr = (address*)(sender_sp - frame::return_addr_offset);

return cfp;
}

intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) {
assert(nm != nullptr && nm->needs_stack_repair(), "");
// The stack increment resides just below the saved rbp on the stack
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/x86/frame_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@
public:
// Support for scalarized inline type calling convention
intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const;
struct CompiledFramePointers {
intptr_t* sender_sp; // The top of the stack of the sender
intptr_t** saved_fp_addr; // Where RBP is saved on the stack
address* sender_pc_addr; // Where return address (copy #1 in remove_frame's comment) is saved on the stack
};
CompiledFramePointers compiled_frame_details() const;
static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr);
bool was_augmented_on_entry(int& real_size) const;

Expand Down
43 changes: 7 additions & 36 deletions src/hotspot/cpu/x86/frame_x86.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,36 +426,7 @@ inline frame frame::sender_raw(RegisterMap* map) const {

inline frame frame::sender_for_compiled_frame(RegisterMap* map) const {
assert(map != nullptr, "map must be set");

// frame owned by optimizing compiler
assert(_cb->frame_size() > 0, "must have non-zero frame size");
intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
assert(sender_sp == real_fp(), "");

#ifdef ASSERT
address sender_pc_copy = (address) *(sender_sp-1);
#endif

// This is the saved value of EBP which may or may not really be an FP.
// It is only an FP if the sender is an interpreter frame (or C1?).
// saved_fp_addr should be correct even for a bottom thawed frame (with a return barrier)
intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);

// Repair the sender sp if the frame has been extended
sender_sp = repair_sender_sp(sender_sp, saved_fp_addr);

// On Intel the return_address is always the word on the stack
address sender_pc = (address) *(sender_sp-1);

#ifdef ASSERT
if (sender_pc != sender_pc_copy) {
// When extending the stack in the callee method entry to make room for unpacking of value
// type args, we keep a copy of the sender pc at the expected location in the callee frame.
// If the sender pc is patched due to deoptimization, the copy is not consistent anymore.
nmethod* nm = CodeCache::find_blob(sender_pc)->as_nmethod();
assert(sender_pc == nm->deopt_handler_entry(), "unexpected sender pc");
}
#endif
CompiledFramePointers cfp = compiled_frame_details();

if (map->update_map()) {
// Tell GC to use argument oopmaps for some runtime stubs that need it.
Expand Down Expand Up @@ -494,21 +465,21 @@ inline frame frame::sender_for_compiled_frame(RegisterMap* map) const {
// Since the prolog does the save and restore of EBP there is no oopmap
// for it so we must fill in its location as if there was an oopmap entry
// since if our caller was compiled code there could be live jvm state in it.
update_map_with_saved_link(map, saved_fp_addr);
update_map_with_saved_link(map, cfp.saved_fp_addr);
}

assert(sender_sp != sp(), "must have changed");
assert(cfp.sender_sp != sp(), "must have changed");

if (Continuation::is_return_barrier_entry(sender_pc)) {
if (Continuation::is_return_barrier_entry(*cfp.sender_pc_addr)) {
if (map->walk_cont()) { // about to walk into an h-stack
return Continuation::top_frame(*this, map);
} else {
return Continuation::continuation_bottom_sender(map->thread(), *this, sender_sp);
return Continuation::continuation_bottom_sender(map->thread(), *this, cfp.sender_sp);
}
}

intptr_t* unextended_sp = sender_sp;
return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
intptr_t* unextended_sp = cfp.sender_sp;
return frame(cfp.sender_sp, unextended_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr);
}

template <typename RegisterMapT>
Expand Down
67 changes: 65 additions & 2 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6065,7 +6065,11 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re
}

// Calculate the extra stack space required for packing or unpacking inline
// args and adjust the stack pointer
// args and adjust the stack pointer.
//
// This extra stack space take into account the copy #2 of the return address,
// but NOT the saved RBP or the normal size of the frame (see MacroAssembler::remove_frame
// for notations).
int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
// Two additional slots to account for return address
int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size;
Expand All @@ -6076,7 +6080,13 @@ int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) {
assert(sp_inc > 0, "sanity");
pop(r13);
subptr(rsp, sp_inc);
#ifdef ASSERT
movl(Address(rsp, -VMRegImpl::stack_slot_size), badRegWordVal);
movl(Address(rsp, -2 * VMRegImpl::stack_slot_size), badRegWordVal);
subptr(rsp, 2 * VMRegImpl::stack_slot_size);
#else
push(r13);
#endif
return sp_inc;
}

Expand Down Expand Up @@ -6311,7 +6321,60 @@ VMReg MacroAssembler::spill_reg_for(VMReg reg) {
void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) {
assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
if (needs_stack_repair) {
// TODO 8284443 Add a comment drawing the frame like in Aarch64's version of MacroAssembler::remove_frame
// The method has a scalarized entry point (where fields of value object arguments
// are passed through registers and stack), and a non-scalarized entry point (where
// value object arguments are given as oops). The non-scalarized entry point will
// first load each field of value object arguments and store them in registers and on
// the stack in a way compatible with the scalarized entry point. To do so, some extra
// stack space might be reserved (if argument registers are not enough). On leaving the
// method, this space must be freed.
//
// In case we used the non-scalarized entry point the stack looks like this:
//
// | Arguments from caller |
// |---------------------------| <-- caller's SP
// | Return address #1 |
// |---------------------------|
// | Extension space for |
// | inline arg (un)packing |
// |---------------------------|
// | Return address #2 |
// | Saved RBP |
// |---------------------------| <-- start of this method's frame
// | sp_inc |
// | method locals |
// |---------------------------| <-- SP
//
// There is two copies of the return address on the stack. They will be identical at
// first, but that can change.
// If the caller has been deoptimized, the copy #1 will be patched to point at the
// deopt blob, and the copy #2 will still point into the old method. In short
// the copy #2 is not reliable and should not be used. It is mostly needed to
// add space between the extension space and the locals, as there would be between
// the real arguments and the locals if we don't need to do unpacking (from the
// scalarized entry point).
//
// When leaving, one must use the copy #1 of the return address, while keeping in mind
// that from the scalarized entry point, there will be only one copy. Indeed, in the
// case we used the scalarized calling convention, the stack looks like this:
//
// | Arguments from caller |
// |---------------------------| <-- caller's SP
// | Return address |
// | Saved RBP |
// |---------------------------| <-- start of this method's frame
// | sp_inc |
// | method locals |
// |---------------------------| <-- SP
//
// The sp_inc stack slot holds the total size of the frame, including the extension
// space the possible copy #2 of the return address and the saved RBP (but never the
// copy #1 of the return address). That is how to find the copy #1 of the return address.
// This size is expressed in bytes. Be careful when using it from C++ in pointer arithmetic;
// you might need to divide it by wordSize.
//
// One can find sp_inc since the start the method's frame is SP + initial_framesize.

movq(rbp, Address(rsp, initial_framesize));
// The stack increment resides just below the saved rbp
addq(rsp, Address(rsp, initial_framesize - wordSize));
Expand Down
4 changes: 2 additions & 2 deletions test/jdk/ProblemList.txt
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,8 @@ java/lang/invoke/VarHandles/VarHandleTestMethodHandleAccessValue.java 8367346 ge
jdk/classfile/AccessFlagsTest.java 8366270 generic-all
jdk/jfr/event/runtime/TestClassLoaderStatsEvent.java 8366820 generic-all

jdk/internal/vm/Continuation/Fuzz.java#default 8370177 generic-aarch64
jdk/internal/vm/Continuation/Fuzz.java#preserve-fp 8370177 generic-aarch64
jdk/internal/vm/Continuation/Fuzz.java#default 8370177 generic-aarch64,generic-x64
jdk/internal/vm/Continuation/Fuzz.java#preserve-fp 8370177 generic-aarch64,generic-x64

sun/tools/jhsdb/BasicLauncherTest.java 8366806 generic-all
sun/tools/jhsdb/HeapDumpTest.java 8366806 generic-all
Expand Down