Skip to content

Commit f20836a

Browse files
Tao ChenKernel Patches Daemon
authored andcommitted
bpf: Hold the perf callchain entry until used completely
As Alexei noted, get_perf_callchain() return values may be reused if a task is preempted after the BPF program enters migrate disable mode. The perf_callchain_entres has a small stack of entries, and we can reuse it as follows: 1. get the perf callchain entry 2. BPF use... 3. put the perf callchain entry And Peter suggested that get_recursion_context used with preemption disabled, so we should disable preemption at BPF side. Acked-by: Yonghong Song <yonghong.song@linux.dev> Signed-off-by: Tao Chen <chen.dylane@linux.dev>
1 parent f844b26 commit f20836a

File tree

1 file changed

+56
-12
lines changed

1 file changed

+56
-12
lines changed

kernel/bpf/stackmap.c

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,13 +210,14 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
210210
}
211211

212212
static struct perf_callchain_entry *
213-
get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
213+
get_callchain_entry_for_task(int *rctx, struct task_struct *task, u32 max_depth)
214214
{
215215
#ifdef CONFIG_STACKTRACE
216216
struct perf_callchain_entry *entry;
217-
int rctx;
218217

219-
entry = get_callchain_entry(&rctx);
218+
preempt_disable();
219+
entry = get_callchain_entry(rctx);
220+
preempt_enable();
220221

221222
if (!entry)
222223
return NULL;
@@ -238,8 +239,6 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
238239
to[i] = (u64)(from[i]);
239240
}
240241

241-
put_callchain_entry(rctx);
242-
243242
return entry;
244243
#else /* CONFIG_STACKTRACE */
245244
return NULL;
@@ -320,6 +319,34 @@ static long __bpf_get_stackid(struct bpf_map *map,
320319
return id;
321320
}
322321

322+
static struct perf_callchain_entry *
323+
bpf_get_perf_callchain(int *rctx, struct pt_regs *regs, bool kernel, bool user,
324+
int max_stack, bool crosstask)
325+
{
326+
struct perf_callchain_entry_ctx ctx;
327+
struct perf_callchain_entry *entry;
328+
329+
preempt_disable();
330+
entry = get_callchain_entry(rctx);
331+
preempt_enable();
332+
333+
if (unlikely(!entry))
334+
return NULL;
335+
336+
__init_perf_callchain_ctx(&ctx, entry, max_stack, false);
337+
if (kernel)
338+
__get_perf_callchain_kernel(&ctx, regs);
339+
if (user && !crosstask)
340+
__get_perf_callchain_user(&ctx, regs, 0);
341+
342+
return entry;
343+
}
344+
345+
static void bpf_put_perf_callchain(int rctx)
346+
{
347+
put_callchain_entry(rctx);
348+
}
349+
323350
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
324351
u64, flags)
325352
{
@@ -328,20 +355,25 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
328355
struct perf_callchain_entry *trace;
329356
bool kernel = !user;
330357
u32 max_depth;
358+
int rctx, ret;
331359

332360
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
333361
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
334362
return -EINVAL;
335363

336364
max_depth = stack_map_calculate_max_depth(map->value_size, elem_size, flags);
337-
trace = get_perf_callchain(regs, kernel, user, max_depth,
338-
false, false, 0);
365+
366+
trace = bpf_get_perf_callchain(&rctx, regs, kernel, user, max_depth,
367+
false);
339368

340369
if (unlikely(!trace))
341370
/* couldn't fetch the stack trace */
342371
return -EFAULT;
343372

344-
return __bpf_get_stackid(map, trace, flags);
373+
ret = __bpf_get_stackid(map, trace, flags);
374+
bpf_put_perf_callchain(rctx);
375+
376+
return ret;
345377
}
346378

347379
const struct bpf_func_proto bpf_get_stackid_proto = {
@@ -435,6 +467,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
435467
bool kernel = !user;
436468
int err = -EINVAL;
437469
u64 *ips;
470+
int rctx;
438471

439472
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
440473
BPF_F_USER_BUILD_ID)))
@@ -467,18 +500,26 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
467500
trace = trace_in;
468501
trace->nr = min_t(u32, trace->nr, max_depth);
469502
} else if (kernel && task) {
470-
trace = get_callchain_entry_for_task(task, max_depth);
503+
trace = get_callchain_entry_for_task(&rctx, task, max_depth);
471504
} else {
472-
trace = get_perf_callchain(regs, kernel, user, max_depth,
473-
crosstask, false, 0);
505+
trace = bpf_get_perf_callchain(&rctx, regs, kernel, user, max_depth,
506+
crosstask);
474507
}
475508

476-
if (unlikely(!trace) || trace->nr < skip) {
509+
if (unlikely(!trace)) {
477510
if (may_fault)
478511
rcu_read_unlock();
479512
goto err_fault;
480513
}
481514

515+
if (trace->nr < skip) {
516+
if (may_fault)
517+
rcu_read_unlock();
518+
if (!trace_in)
519+
bpf_put_perf_callchain(rctx);
520+
goto err_fault;
521+
}
522+
482523
trace_nr = trace->nr - skip;
483524
copy_len = trace_nr * elem_size;
484525

@@ -497,6 +538,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
497538
if (may_fault)
498539
rcu_read_unlock();
499540

541+
if (!trace_in)
542+
bpf_put_perf_callchain(rctx);
543+
500544
if (user_build_id)
501545
stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
502546

0 commit comments

Comments
 (0)