1- ;
2- ; stack switching code for MASM on x64
3- ; Kristjan Valur Jonsson, apr 2011
4- ; Modified for stackman, dec 2019
5- ; Added stackman_call, dec 2020
6- ;
7-
8- include macamd64. inc
9-
10- pop_reg MACRO reg
11- pop reg
12- ENDM
13-
14- load_xmm128 macro Reg , Offset
15- movdqa Reg , Offset [ rsp ]
16- endm
17-
18- .code
19-
20- ;arguments callback, context, are passed in rcx, rdx, respectively
21- ;stackman_switch PROC FRAME
22- NESTED_ENTRY stackman_switch , _TEXT $ 00
23- ; save all registers that the x64 ABI specifies as non-volatile.
24- ; This includes some mmx registers. May not always be necessary,
25- ; unless our application is doing 3D, but better safe than sorry.
26- alloc_stack 168 ; 10 * 16 bytes, plus 8 bytes to make stack 16 byte aligned
27- save_xmm128 xmm15 , 144
28- save_xmm128 xmm14 , 128
29- save_xmm128 xmm13 , 112
30- save_xmm128 xmm12 , 96
31- save_xmm128 xmm11 , 80
32- save_xmm128 xmm10 , 64
33- save_xmm128 xmm9 , 48
34- save_xmm128 xmm8 , 32
35- save_xmm128 xmm7 , 16
36- save_xmm128 xmm6 , 0
37-
38- push_reg r15
39- push_reg r14
40- push_reg r13
41- push_reg r12
42-
43- push_reg rbp
44- push_reg rbx
45- push_reg rdi
46- push_reg rsi
47-
48- sub rsp , 20h ;allocate shadow stack space for callee arguments
49- .allocstack 20h
50- .endprolog
51-
52- ;save argments in nonvolatile registers
53- mov r12 , rcx ;callback
54- mov r13 , rdx ;context
55-
56- ; load stack base that we are saving minus the callee argument
57- ; shadow stack. We don't want that clobbered
58- mov rcx , r13 ;arg1, context
59- mov rdx , 0 ;arg2, opcode STACKMAN_OP_SAVE
60- lea r8 , [ rsp + 20h ] ;arg3, stack pointer
61- mov rbx , rsp ; ; keep old stack pointer
62- call r12 ;
63-
64- ;actual stack switch (and re-allocating the shadow stack):
65- lea rsp , [ rax - 20h ]
66- ;re-adjust base pointer
67- sub rbx , rsp ;
68- sub rbp , rbx ;
69-
70- mov rcx , r13 ;arg1, context
71- mov rdx , 1 ;arg2, opcode STACKMAN_OP_RESTORE
72- mov r8 , rax ;arg3, new stack pointer
73- call r12
74- ;return the rax
75-
76- add rsp , 20h
77- pop_reg rsi
78- pop_reg rdi
79- pop_reg rbx
80- pop_reg rbp
81-
82- pop_reg r12
83- pop_reg r13
84- pop_reg r14
85- pop_reg r15
86-
87- load_xmm128 xmm15 , 144
88- load_xmm128 xmm14 , 128
89- load_xmm128 xmm13 , 112
90- load_xmm128 xmm12 , 96
91- load_xmm128 xmm11 , 80
92- load_xmm128 xmm10 , 64
93- load_xmm128 xmm9 , 48
94- load_xmm128 xmm8 , 32
95- load_xmm128 xmm7 , 16
96- load_xmm128 xmm6 , 0
97- add rsp , 168
98- ret
99-
100- NESTED_END stackman_switch , _TEXT $ 00
101- ;stackman_switch ENDP
102-
103- ; based on template from https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-160
104- stackman_call PROC FRAME
105- push rbp
106- .pushreg rbp
107- ; now our stack is 16 byte aligned. don't need additional spacle
108- ;sub rsp, 040h
109- ;.allocstack 040h
110- lea rbp , [ rsp + 00h ]
111- .setframe rbp , 00h
112- .endprolog
113-
114- ; suffle arguments into volatile registers
115- mov rax , rcx ; callback
116- mov rcx , rdx ; context into first arg
117- mov r9 , r8 ; and stack pointer in volatile registers
118-
119- ; set up call
120- mov r8 , rsp
121- mov edx , 2
122- ; rcx already set up with context
123-
124- ; modify stack pointer before call
125- mov rsp , r9
126- sub rsp , 32 ;pre-allocate parameter stack for the callee
127- call rax
128-
129- ; officialepilog
130- lea rsp , [ rbp + 0h ]
131- pop rbp
132- ret 0
133- stackman_call ENDP
134-
1+ ;
2+ ; stack switching code for MASM on x64
3+ ; Kristjan Valur Jonsson, apr 2011
4+ ; Modified for stackman, dec 2019
5+ ; Added stackman_call, dec 2020
6+ ;
7+
8+ include macamd64. inc
9+
10+ pop_reg MACRO reg
11+ pop reg
12+ ENDM
13+
14+ load_xmm128 macro Reg , Offset
15+ movdqa Reg , Offset [ rsp ]
16+ endm
17+
18+ .code
19+
20+ ;arguments callback, context, are passed in rcx, rdx, respectively
21+ ;stackman_switch PROC FRAME
22+ NESTED_ENTRY stackman_switch , _TEXT $ 00
23+ ; save all registers that the x64 ABI specifies as non-volatile.
24+ ; This includes some mmx registers. May not always be necessary,
25+ ; unless our application is doing 3D, but better safe than sorry.
26+ alloc_stack 168 ; 10 * 16 bytes, plus 8 bytes to make stack 16 byte aligned
27+ save_xmm128 xmm15 , 144
28+ save_xmm128 xmm14 , 128
29+ save_xmm128 xmm13 , 112
30+ save_xmm128 xmm12 , 96
31+ save_xmm128 xmm11 , 80
32+ save_xmm128 xmm10 , 64
33+ save_xmm128 xmm9 , 48
34+ save_xmm128 xmm8 , 32
35+ save_xmm128 xmm7 , 16
36+ save_xmm128 xmm6 , 0
37+
38+ push_reg r15
39+ push_reg r14
40+ push_reg r13
41+ push_reg r12
42+
43+ push_reg rbp
44+ push_reg rbx
45+ push_reg rdi
46+ push_reg rsi
47+
48+ sub rsp , 20h ;allocate shadow stack space for callee arguments
49+ .allocstack 20h
50+ .endprolog
51+
52+ ;save argments in nonvolatile registers
53+ mov r12 , rcx ;callback
54+ mov r13 , rdx ;context
55+
56+ ; load stack base that we are saving minus the callee argument
57+ ; shadow stack. We don't want that clobbered
58+ mov rcx , r13 ;arg1, context
59+ mov rdx , 0 ;arg2, opcode STACKMAN_OP_SAVE
60+ lea r8 , [ rsp + 20h ] ;arg3, stack pointer
61+ mov rbx , rsp ; ; keep old stack pointer
62+ call r12 ;
63+
64+ ;actual stack switch (and re-allocating the shadow stack):
65+ lea rsp , [ rax - 20h ]
66+ ;re-adjust base pointer
67+ sub rbx , rsp ;
68+ sub rbp , rbx ;
69+
70+ mov rcx , r13 ;arg1, context
71+ mov rdx , 1 ;arg2, opcode STACKMAN_OP_RESTORE
72+ mov r8 , rax ;arg3, new stack pointer
73+ call r12
74+ ;return the rax
75+
76+ add rsp , 20h
77+ pop_reg rsi
78+ pop_reg rdi
79+ pop_reg rbx
80+ pop_reg rbp
81+
82+ pop_reg r12
83+ pop_reg r13
84+ pop_reg r14
85+ pop_reg r15
86+
87+ load_xmm128 xmm15 , 144
88+ load_xmm128 xmm14 , 128
89+ load_xmm128 xmm13 , 112
90+ load_xmm128 xmm12 , 96
91+ load_xmm128 xmm11 , 80
92+ load_xmm128 xmm10 , 64
93+ load_xmm128 xmm9 , 48
94+ load_xmm128 xmm8 , 32
95+ load_xmm128 xmm7 , 16
96+ load_xmm128 xmm6 , 0
97+ add rsp , 168
98+ ret
99+
100+ NESTED_END stackman_switch , _TEXT $ 00
101+ ;stackman_switch ENDP
102+
103+ ; based on template from https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-160
104+ stackman_call PROC FRAME
105+ push rbp
106+ .pushreg rbp
107+ ; now our stack is 16 byte aligned. don't need additional spacle
108+ ;sub rsp, 040h
109+ ;.allocstack 040h
110+ lea rbp , [ rsp + 00h ]
111+ .setframe rbp , 00h
112+ .endprolog
113+
114+ ; suffle arguments into volatile registers
115+ mov rax , rcx ; callback
116+ mov rcx , rdx ; context into first arg
117+ mov r9 , r8 ; and stack pointer in volatile registers
118+
119+ ; set up call
120+ mov r8 , rsp
121+ mov edx , 2
122+ ; rcx already set up with context
123+
124+ ; modify stack pointer before call
125+ mov rsp , r9
126+ sub rsp , 32 ;pre-allocate parameter stack for the callee
127+ call rax
128+
129+ ; officialepilog
130+ lea rsp , [ rbp + 0h ]
131+ pop rbp
132+ ret 0
133+ stackman_call ENDP
134+
135135END
0 commit comments