From 59089a189e3adde4cf85f2ce479738d1ae4c514d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Jun 2021 09:39:15 +0000 Subject: bpf: Remove superfluous aux sanitation on subprog rejection Follow-up to fe9a5ca7e370 ("bpf: Do not mark insn as seen under speculative path verification"). The sanitize_insn_aux_data() helper does not serve a particular purpose in today's code. The original intention for the helper was that if function-by-function verification fails, a given program would be cleared from temporary insn_aux_data[], and then its verification would be re-attempted in the context of the main program a second time. However, a failure in do_check_subprogs() will skip do_check_main() and propagate the error to the user instead, thus such situation can never occur. Given its interaction is not compatible to the Spectre v1 mitigation (due to comparing aux->seen with env->pass_cnt), just remove sanitize_insn_aux_data() to avoid future bugs in this area. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9de3c9c3267c..8a7a28b4cfb9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12752,37 +12752,6 @@ static void free_states(struct bpf_verifier_env *env) } } -/* The verifier is using insn_aux_data[] to store temporary data during - * verification and to store information for passes that run after the - * verification like dead code sanitization. do_check_common() for subprogram N - * may analyze many other subprograms. sanitize_insn_aux_data() clears all - * temporary data after do_check_common() finds that subprogram N cannot be - * verified independently. pass_cnt counts the number of times - * do_check_common() was run and insn->aux->seen tells the pass number - * insn_aux_data was touched. These variables are compared to clear temporary - * data from failed pass. For testing and experiments do_check_common() can be - * run multiple times even when prior attempt to verify is unsuccessful. - * - * Note that special handling is needed on !env->bypass_spec_v1 if this is - * ever called outside of error path with subsequent program rejection. - */ -static void sanitize_insn_aux_data(struct bpf_verifier_env *env) -{ - struct bpf_insn *insn = env->prog->insnsi; - struct bpf_insn_aux_data *aux; - int i, class; - - for (i = 0; i < env->prog->len; i++) { - class = BPF_CLASS(insn[i].code); - if (class != BPF_LDX && class != BPF_STX) - continue; - aux = &env->insn_aux_data[i]; - if (aux->seen != env->pass_cnt) - continue; - memset(aux, 0, offsetof(typeof(*aux), orig_idx)); - } -} - static int do_check_common(struct bpf_verifier_env *env, int subprog) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); @@ -12859,9 +12828,6 @@ out: if (!ret && pop_log) bpf_vlog_reset(&env->log, 0); free_states(env); - if (ret) - /* clean aux data in case subprog was rejected */ - sanitize_insn_aux_data(env); return ret; } -- cgit v1.2.3 From e042aa532c84d18ff13291d00620502ce7a38dda Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Jul 2021 09:18:21 +0000 Subject: bpf: Fix pointer arithmetic mask tightening under state pruning In 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask") we narrowed the offset mask for unprivileged pointer arithmetic in order to mitigate a corner case where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of- bounds in order to leak kernel memory via side-channel to user space. The verifier's state pruning for scalars leaves one corner case open where in the first verification path R_x holds an unknown scalar with an aux->alu_limit of e.g. 7, and in a second verification path that same register R_x, here denoted as R_x', holds an unknown scalar which has tighter bounds and would thus satisfy range_within(R_x, R_x') as well as tnum_in(R_x, R_x') for state pruning, yielding an aux->alu_limit of 3: Given the second path fits the register constraints for pruning, the final generated mask from aux->alu_limit will remain at 7. While technically not wrong for the non-speculative domain, it would however be possible to craft similar cases where the mask would be too wide as in 7fedb63a8307. One way to fix it is to detect the presence of unknown scalar map pointer arithmetic and force a deeper search on unknown scalars to ensure that we do not run into a masking mismatch. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e774ecc1cd1f..7ba7e800d472 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -414,6 +414,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; bool allow_ptr_to_map_access; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8a7a28b4cfb9..657062cb4d85 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6561,6 +6561,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + /* Limit pruning on unknown scalars to enable deep search for + * potential masking differences from other program paths. + */ + if (!off_is_imm) + env->explore_alu_limits = true; } err = update_alu_sanitation_state(aux, alu_state, alu_limit); @@ -9936,8 +9942,8 @@ next: } /* Returns true if (rold safe implies rcur safe) */ -static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct bpf_id_pair *idmap) +static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { bool equal; @@ -9963,6 +9969,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; switch (rold->type) { case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; if (rcur->type == SCALAR_VALUE) { if (!rold->precise && !rcur->precise) return true; @@ -10053,9 +10061,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } -static bool stacksafe(struct bpf_func_state *old, - struct bpf_func_state *cur, - struct bpf_id_pair *idmap) +static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, + struct bpf_func_state *cur, struct bpf_id_pair *idmap) { int i, spi; @@ -10100,9 +10107,8 @@ static bool stacksafe(struct bpf_func_state *old, continue; if (old->stack[spi].slot_type[0] != STACK_SPILL) continue; - if (!regsafe(&old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, - idmap)) + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -10159,10 +10165,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); for (i = 0; i < MAX_BPF_REG; i++) - if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch)) + if (!regsafe(env, &old->regs[i], &cur->regs[i], + env->idmap_scratch)) return false; - if (!stacksafe(old, cur, env->idmap_scratch)) + if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; if (!refsafe(old, cur)) -- cgit v1.2.3 From a6c39de76d709f30982d4b80a9b9537e1d388858 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Jul 2021 13:15:33 +0000 Subject: bpf, selftests: Add test cases for pointer alu from multiple paths Add several test cases for checking update_alu_sanitation_state() under multiple paths: # ./test_verifier [...] #1061/u map access: known scalar += value_ptr unknown vs const OK #1061/p map access: known scalar += value_ptr unknown vs const OK #1062/u map access: known scalar += value_ptr const vs unknown OK #1062/p map access: known scalar += value_ptr const vs unknown OK #1063/u map access: known scalar += value_ptr const vs const (ne) OK #1063/p map access: known scalar += value_ptr const vs const (ne) OK #1064/u map access: known scalar += value_ptr const vs const (eq) OK #1064/p map access: known scalar += value_ptr const vs const (eq) OK #1065/u map access: known scalar += value_ptr unknown vs unknown (eq) OK #1065/p map access: known scalar += value_ptr unknown vs unknown (eq) OK #1066/u map access: known scalar += value_ptr unknown vs unknown (lt) OK #1066/p map access: known scalar += value_ptr unknown vs unknown (lt) OK #1067/u map access: known scalar += value_ptr unknown vs unknown (gt) OK #1067/p map access: known scalar += value_ptr unknown vs unknown (gt) OK [...] Summary: 1762 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- .../selftests/bpf/verifier/value_ptr_arith.c | 229 +++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a3e593ddfafc..2debba4e8a3a 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1,3 +1,232 @@ +{ + "map access: known scalar += value_ptr unknown vs const", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs unknown", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (ne)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (lt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (gt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, { "map access: known scalar += value_ptr from different maps", .insns = { -- cgit v1.2.3 From cfbe3650dd3ef2ea9a4420ca89d9a4df98af3fb6 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 14 Jul 2021 11:27:03 +0800 Subject: netfilter: nf_tables: fix audit memory leak in nf_tables_commit In nf_tables_commit, if nf_tables_commit_audit_alloc fails, it does not free the adp variable. Fix this by adding nf_tables_commit_audit_free which frees the linked list with the head node adl. backtrace: kmalloc include/linux/slab.h:591 [inline] kzalloc include/linux/slab.h:721 [inline] nf_tables_commit_audit_alloc net/netfilter/nf_tables_api.c:8439 [inline] nf_tables_commit+0x16e/0x1760 net/netfilter/nf_tables_api.c:8508 nfnetlink_rcv_batch+0x512/0xa80 net/netfilter/nfnetlink.c:562 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0x1fa/0x220 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x2c7/0x3e0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x36b/0x6b0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:702 [inline] sock_sendmsg+0x56/0x80 net/socket.c:722 Reported-by: syzbot Reported-by: kernel test robot Fixes: c520292f29b8 ("audit: log nftables configuration change events once per table") Signed-off-by: Dongliang Mu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index de182d1f7c4e..081437dd75b7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl, return 0; } +static void nf_tables_commit_audit_free(struct list_head *adl) +{ + struct nft_audit_data *adp, *adn; + + list_for_each_entry_safe(adp, adn, adl, list) { + list_del(&adp->list); + kfree(adp); + } +} + static void nf_tables_commit_audit_collect(struct list_head *adl, struct nft_table *table, u32 op) { @@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } if (trans->msg_type == NFT_MSG_NEWRULE || @@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } } -- cgit v1.2.3 From d6371c76e20d7d3f61b05fd67b596af4d14a8886 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 19 Jul 2021 09:51:34 +0100 Subject: bpf: Fix OOB read when printing XDP link fdinfo We got the following UBSAN report on one of our testing machines: ================================================================================ UBSAN: array-index-out-of-bounds in kernel/bpf/syscall.c:2389:24 index 6 is out of range for type 'char *[6]' CPU: 43 PID: 930921 Comm: systemd-coredum Tainted: G O 5.10.48-cloudflare-kasan-2021.7.0 #1 Hardware name: Call Trace: dump_stack+0x7d/0xa3 ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds.cold+0x43/0x48 ? seq_printf+0x17d/0x250 bpf_link_show_fdinfo+0x329/0x380 ? bpf_map_value_size+0xe0/0xe0 ? put_files_struct+0x20/0x2d0 ? __kasan_kmalloc.constprop.0+0xc2/0xd0 seq_show+0x3f7/0x540 seq_read_iter+0x3f8/0x1040 seq_read+0x329/0x500 ? seq_read_iter+0x1040/0x1040 ? __fsnotify_parent+0x80/0x820 ? __fsnotify_update_child_dentry_flags+0x380/0x380 vfs_read+0x123/0x460 ksys_read+0xed/0x1c0 ? __x64_sys_pwrite64+0x1f0/0x1f0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================================ ================================================================================ UBSAN: object-size-mismatch in kernel/bpf/syscall.c:2384:2 From the report, we can infer that some array access in bpf_link_show_fdinfo at index 6 is out of bounds. The obvious candidate is bpf_link_type_strs[BPF_LINK_TYPE_XDP] with BPF_LINK_TYPE_XDP == 6. It turns out that BPF_LINK_TYPE_XDP is missing from bpf_types.h and therefore doesn't have an entry in bpf_link_type_strs: pos: 0 flags: 02000000 mnt_id: 13 link_type: (null) link_id: 4 prog_tag: bcf7977d3b93787c prog_id: 4 ifindex: 1 Fixes: aa8d3a716b59 ("bpf, xdp: Add bpf_link-based XDP attachment API") Signed-off-by: Lorenz Bauer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210719085134.43325-2-lmb@cloudflare.com --- include/linux/bpf_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a9db1eae6796..ae3ac3a2018c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) +BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #endif -- cgit v1.2.3 From ec61cd49bf566401306cfc4855bda8c08bbaa46c Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 28 Jun 2021 14:37:13 +0200 Subject: mac80211: Do not strip skb headroom on monitor frames When a monitor interface is present together with other interfaces, a received skb is copied and received on the monitor netdev. Before, the copied skb was allocated with exactly the amount of space needed for the radiotap header, resulting in an skb without any headroom at all being received on the monitor netdev. With the introduction of eBPF and XDP in the kernel, skbs may be processed by custom eBPF programs. However, since the skb cannot be reallocated in the eBPF program, no more data or headers can be pushed. The old code made sure the final headroom was zero regardless of the value of NET_SKB_PAD, so increasing that constant would have no effect. Now we allocate monitor skb copies with a headroom of NET_SKB_PAD bytes before the radiotap header. Monitor interfaces now behave in the same way as other netdev interfaces that honor the NET_SKB_PAD constant. Signed-off-by: Johan Almbladh Link: https://lore.kernel.org/r/20210628123713.2070753-1-johan.almbladh@anyfinetworks.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 771921c057e8..2563473b5cf1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local, * Need to make a copy and possibly remove radiotap header * and FCS from the original. */ - skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC); + skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD, + 0, GFP_ATOMIC); if (!skb) return NULL; -- cgit v1.2.3 From 1a7915501ca94a1f10288defe333cd5ade210b63 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 Jun 2021 13:28:53 +0200 Subject: mac80211: fix starting aggregation sessions on mesh interfaces The logic for starting aggregation sessions was recently moved from minstrel_ht to mac80211, into the subif tx handler just after the sta lookup. Unfortunately this didn't work for mesh interfaces, since the sta lookup is deferred until a much later point in time on those. Fix this by also calling the aggregation check right after the deferred sta lookup. Fixes: 08a46c642001 ("mac80211: move A-MPDU session check from minstrel_ht to mac80211") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210629112853.29785-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 57 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e96981144358..8509778ff31f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, return queued; } +static void +ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct rate_control_ref *ref = sdata->local->rate_ctrl; + u16 tid; + + if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) + return; + + if (!sta || !sta->sta.ht_cap.ht_supported || + !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || + skb->protocol == sdata->control_port_protocol) + return; + + tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(&sta->sta, tid, 0); +} + /* * initialises @tx * pass %NULL for the station if unknown, a valid pointer if known @@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + bool aggr_check = false; int tid; memset(tx, 0, sizeof(*tx)); @@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, } else if (tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } - if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) + if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) { tx->sta = sta_info_get(sdata, hdr->addr1); + aggr_check = true; + } } if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && @@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_tx *tid_tx; tid = ieee80211_get_tid(hdr); - tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx && aggr_check) { + ieee80211_aggr_check(sdata, tx->sta, skb); + tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + } + if (tid_tx) { bool queued; @@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) } EXPORT_SYMBOL(ieee80211_txq_schedule_start); -static void -ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - struct sk_buff *skb) -{ - struct rate_control_ref *ref = sdata->local->rate_ctrl; - u16 tid; - - if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) - return; - - if (!sta || !sta->sta.ht_cap.ht_supported || - !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || - skb->protocol == sdata->control_port_protocol) - return; - - tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - if (likely(sta->ampdu_mlme.tid_tx[tid])) - return; - - ieee80211_start_tx_ba_session(&sta->sta, tid, 0); -} - void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, -- cgit v1.2.3 From a5d3cbdb09ff1f52cbe040932e06c8b9915c6dad Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Jul 2021 07:01:11 +0200 Subject: mac80211: fix enabling 4-address mode on a sta vif after assoc Notify the driver about the 4-address mode change and also send a nulldata packet to the AP to notify it about the change Fixes: 1ff4e8f2dec8 ("mac80211: notify the driver when a sta uses 4-address mode") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210702050111.47546-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 19 +++++++++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 84cc7733ea66..4e6f11e63df3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; int ret; ret = ieee80211_if_change_type(sdata, type); @@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy, RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + if (params->use_4addr == ifmgd->use_4addr) + return 0; + sdata->u.mgd.use_4addr = params->use_4addr; + if (!ifmgd->associated) + return 0; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, ifmgd->bssid); + if (sta) + drv_sta_set_4addr(local, sdata, &sta->sta, + params->use_4addr); + mutex_unlock(&local->sta_mtx); + + if (params->use_4addr) + ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 22549b95d1aa..30ce6d2ec7ce 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, bool powersave); +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack, u16 tx_time); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a00f11a33699..c0ea3b1aa9e1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { struct sk_buff *skb; struct ieee80211_hdr *nullfunc; -- cgit v1.2.3 From 17109e9783799be2a063b2bd861a508194b0a487 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 6 Jul 2021 17:44:23 +0200 Subject: virt_wifi: fix error on connect When connecting without first doing a scan, the BSS list is empty and __cfg80211_connect_result() generates this warning: $ iw dev wlan0 connect -w VirtWifi [ 15.371989] ------------[ cut here ]------------ [ 15.372179] WARNING: CPU: 0 PID: 92 at net/wireless/sme.c:756 __cfg80211_connect_result+0x402/0x440 [ 15.372383] CPU: 0 PID: 92 Comm: kworker/u2:2 Not tainted 5.13.0-kvm #444 [ 15.372512] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-3.fc34 04/01/2014 [ 15.372597] Workqueue: cfg80211 cfg80211_event_work [ 15.372756] RIP: 0010:__cfg80211_connect_result+0x402/0x440 [ 15.372818] Code: 48 2b 04 25 28 00 00 00 75 59 48 8b 3b 48 8b 76 10 48 8d 65 e0 5b 41 5c 41 5d 41 5e 5d 49 8d 65 f0 41 5d e9 d0 d4 fd ff 0f 0b <0f> 0b e9 f6 fd ff ff e8 f2 4a b4 ff e9 ec fd ff ff 0f 0b e9 19 fd [ 15.372966] RSP: 0018:ffffc900005cbdc0 EFLAGS: 00010246 [ 15.373022] RAX: 0000000000000000 RBX: ffff8880028e2400 RCX: ffff8880028e2472 [ 15.373088] RDX: 0000000000000002 RSI: 00000000fffffe01 RDI: ffffffff815335ba [ 15.373149] RBP: ffffc900005cbe00 R08: 0000000000000008 R09: ffff888002bdf8b8 [ 15.373209] R10: ffff88803ec208f0 R11: ffffffffffffe9ae R12: ffff88801d687d98 [ 15.373280] R13: ffff88801b5fe000 R14: ffffc900005cbdc0 R15: dead000000000100 [ 15.373330] FS: 0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000 [ 15.373382] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.373425] CR2: 000056421c468958 CR3: 000000001b458001 CR4: 0000000000170eb0 [ 15.373478] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 15.373529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 15.373580] Call Trace: [ 15.373611] ? cfg80211_process_wdev_events+0x10e/0x170 [ 15.373743] cfg80211_process_wdev_events+0x10e/0x170 [ 15.373783] cfg80211_process_rdev_events+0x21/0x40 [ 15.373846] cfg80211_event_work+0x20/0x30 [ 15.373892] process_one_work+0x1e9/0x340 [ 15.373956] worker_thread+0x4b/0x3f0 [ 15.374017] ? process_one_work+0x340/0x340 [ 15.374053] kthread+0x11f/0x140 [ 15.374089] ? set_kthread_struct+0x30/0x30 [ 15.374153] ret_from_fork+0x1f/0x30 [ 15.374187] ---[ end trace 321ef0cb7e9c0be1 ]--- wlan0 (phy #0): connected to 00:00:00:00:00:00 Add the fake bss just before the connect so that cfg80211_get_bss() finds the virtual network. As some code was duplicated, move it in a common function. Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20210706154423.11065-1-mcroce@linux.microsoft.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virt_wifi.c | 52 ++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 1df959532c7d..514f2c1124b6 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = { /* Assigned at module init. Guaranteed locally-administered and unicast. */ static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; +static void virt_wifi_inform_bss(struct wiphy *wiphy) +{ + u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); + struct cfg80211_bss *informed_bss; + static const struct { + u8 tag; + u8 len; + u8 ssid[8]; + } __packed ssid = { + .tag = WLAN_EID_SSID, + .len = 8, + .ssid = "VirtWifi", + }; + + informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, + CFG80211_BSS_FTYPE_PRESP, + fake_router_bssid, tsf, + WLAN_CAPABILITY_ESS, 0, + (void *)&ssid, sizeof(ssid), + DBM_TO_MBM(-50), GFP_KERNEL); + cfg80211_put_bss(wiphy, informed_bss); +} + /* Called with the rtnl lock held. */ static int virt_wifi_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) @@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy, /* Acquires and releases the rdev BSS lock. */ static void virt_wifi_scan_result(struct work_struct *work) { - struct { - u8 tag; - u8 len; - u8 ssid[8]; - } __packed ssid = { - .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi", - }; - struct cfg80211_bss *informed_bss; struct virt_wifi_wiphy_priv *priv = container_of(work, struct virt_wifi_wiphy_priv, scan_result.work); struct wiphy *wiphy = priv_to_wiphy(priv); struct cfg80211_scan_info scan_info = { .aborted = false }; - u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); - informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, - CFG80211_BSS_FTYPE_PRESP, - fake_router_bssid, tsf, - WLAN_CAPABILITY_ESS, 0, - (void *)&ssid, sizeof(ssid), - DBM_TO_MBM(-50), GFP_KERNEL); - cfg80211_put_bss(wiphy, informed_bss); + virt_wifi_inform_bss(wiphy); /* Schedules work which acquires and releases the rtnl lock. */ cfg80211_scan_done(priv->scan_request, &scan_info); @@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, if (!could_schedule) return -EBUSY; - if (sme->bssid) + if (sme->bssid) { ether_addr_copy(priv->connect_requested_bss, sme->bssid); - else + } else { + virt_wifi_inform_bss(wiphy); eth_zero_addr(priv->connect_requested_bss); + } wiphy_debug(wiphy, "connect\n"); @@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work) struct virt_wifi_netdev_priv *priv = container_of(work, struct virt_wifi_netdev_priv, connect.work); u8 *requested_bss = priv->connect_requested_bss; - bool has_addr = !is_zero_ether_addr(requested_bss); bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); u16 status = WLAN_STATUS_SUCCESS; - if (!priv->is_up || (has_addr && !right_addr)) + if (is_zero_ether_addr(requested_bss)) + requested_bss = NULL; + + if (!priv->is_up || (requested_bss && !right_addr)) status = WLAN_STATUS_UNSPECIFIED_FAILURE; else priv->is_connected = true; -- cgit v1.2.3 From 0d059964504a1605d84938c0b5b38f6573121c4a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 Jul 2021 21:53:30 +0200 Subject: nl80211: limit band information in non-split data In non-split data, we shouldn't be adding S1G and 6 GHz data (or future bands) since we're really close to the 4k message size limit. Remove those bands, any modern userspace that can use S1G or 6 GHz should already be using split dumps, and if not then it needs to update. Link: https://lore.kernel.org/r/20210712215329.31444162a2c2.I5555312e4a074c84f8b4e7ad79dc4d1fbfc5126c@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 50eb405b0690..16c88beea48b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; for (band = state->band_start; - band < NUM_NL80211_BANDS; band++) { + band < (state->split ? + NUM_NL80211_BANDS : + NL80211_BAND_60GHZ + 1); + band++) { struct ieee80211_supported_band *sband; /* omit higher bands for ancient software */ -- cgit v1.2.3 From f9a5c358c8d26fed0cc45f2afc64633d4ba21dff Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Mon, 28 Jun 2021 21:23:34 +0800 Subject: cfg80211: Fix possible memory leak in function cfg80211_bss_update When we exceed the limit of BSS entries, this function will free the new entry, however, at this time, it is the last door to access the inputed ies, so these ies will be unreferenced objects and cause memory leak. Therefore we should free its ies before deallocating the new entry, beside of dropping it from hidden_list. Signed-off-by: Nguyen Dinh Phi Link: https://lore.kernel.org/r/20210628132334.851095-1-phind.uet@gmail.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f03c7ac8e184..7897b1478c3c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1754,16 +1754,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, * be grouped with this beacon for updates ... */ if (!cfg80211_combine_bsses(rdev, new)) { - kfree(new); + bss_ref_put(rdev, new); goto drop; } } if (rdev->bss_entries >= bss_entries_limit && !cfg80211_bss_expire_oldest(rdev)) { - if (!list_empty(&new->hidden_list)) - list_del(&new->hidden_list); - kfree(new); + bss_ref_put(rdev, new); goto drop; } -- cgit v1.2.3 From 32c3973d808301e7a980f80fee8818fdf7c82b09 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jul 2021 10:10:29 +0200 Subject: netfilter: flowtable: avoid possible false sharing The flowtable follows the same timeout approach as conntrack, use the same idiom as in cc16921351d8 ("netfilter: conntrack: avoid same-timeout update") but also include the fix provided by e37542ba111f ("netfilter: conntrack: avoid possible false sharing"). Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1e50908b1b7e..551976e4284c 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -331,7 +331,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow) { - flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + u32 timeout; + + timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + if (READ_ONCE(flow->timeout) != timeout) + WRITE_ONCE(flow->timeout, timeout); if (likely(!nf_flowtable_hw_offload(flow_table))) return; -- cgit v1.2.3 From 32953df7a6eb56bd9b8f18a13034d55f9fc96cfa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jul 2021 10:20:08 +0200 Subject: netfilter: nft_last: avoid possible false sharing Use the idiom described in: https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance Moreover, prevent a compiler optimization. Fixes: 836382dc2471 ("netfilter: nf_tables: add last expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 8088b99f2ee3..304e33cbed9b 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr, { struct nft_last_priv *priv = nft_expr_priv(expr); - priv->last_jiffies = jiffies; - priv->last_set = 1; + if (READ_ONCE(priv->last_jiffies) != jiffies) + WRITE_ONCE(priv->last_jiffies, jiffies); + if (READ_ONCE(priv->last_set) == 0) + WRITE_ONCE(priv->last_set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); + unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); + u32 last_set = READ_ONCE(priv->last_set); __be64 msecs; - if (time_before(jiffies, priv->last_jiffies)) - priv->last_set = 0; + if (time_before(jiffies, last_jiffies)) { + WRITE_ONCE(priv->last_set, 0); + last_set = 0; + } - if (priv->last_set) - msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies); + if (last_set) + msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies); else msecs = 0; - if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) || + if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) || nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD)) goto nla_put_failure; -- cgit v1.2.3 From 30a56a2b881821625f79837d4d968c679852444e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Jul 2021 18:36:00 +0200 Subject: netfilter: conntrack: adjust stop timestamp to real expiry value In case the entry is evicted via garbage collection there is delay between the timeout value and the eviction event. This adjusts the stop value based on how much time has passed. Fixes: b87a2f9199ea82 ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 83c52df85870..5c03e5106751 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) return false; tstamp = nf_conn_tstamp_find(ct); - if (tstamp && tstamp->stop == 0) + if (tstamp) { + s32 timeout = ct->timeout - nfct_time_stamp; + tstamp->stop = ktime_get_real_ns(); + if (timeout < 0) + tstamp->stop -= jiffies_to_nsecs(-timeout); + } if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { -- cgit v1.2.3 From a33f387ecd5aafae514095c2c4a8c24f7aea7e8b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Jul 2021 18:22:50 +0200 Subject: netfilter: nft_nat: allow to specify layer 4 protocol NAT only nft_nat reports a bogus EAFNOSUPPORT if no layer 3 information is specified. Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 0840c635b752..be1595d6979d 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, alen = sizeof_field(struct nf_nat_range, min_addr.ip6); break; default: - return -EAFNOSUPPORT; + if (tb[NFTA_NAT_REG_ADDR_MIN]) + return -EAFNOSUPPORT; + break; } priv->family = family; -- cgit v1.2.3 From 217e26bd87b2930856726b48a4e71c768b8c9bf5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:22:32 +0200 Subject: netfilter: nfnl_hook: fix unused variable warning The only user of this variable is in an #ifdef: net/netfilter/nfnetlink_hook.c: In function 'nfnl_hook_entries_head': net/netfilter/nfnetlink_hook.c:177:28: error: unused variable 'netdev' [-Werror=unused-variable] Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 50b4e3c9347a..202f57d17bab 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -174,7 +174,9 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; +#ifdef CONFIG_NETFILTER_INGRESS struct net_device *netdev; +#endif switch (pf) { case NFPROTO_IPV4: -- cgit v1.2.3 From 65662a8dcdd01342b71ee44234bcfd0162e195af Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 29 Apr 2021 19:49:47 +0200 Subject: i40e: Fix logic of disabling queues Correct the message flow between driver and firmware when disabling queues. Previously in case of PF reset (due to required reinit after reconfig), the error like: "VSI seid 397 Tx ring 60 disable timeout" could show up occasionally. The error was not a real issue of hardware or firmware, it was caused by wrong sequence of messages invoked by the driver. Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 58 +++++++++++++++++------------ 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 861e59a350bd..5297e6c59083 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4454,11 +4454,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, } /** - * i40e_vsi_control_tx - Start or stop a VSI's rings + * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_tx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; @@ -4467,7 +4466,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, - false /*is xdp*/, enable); + false /*is xdp*/, true); if (ret) break; @@ -4476,7 +4475,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, - true /*is xdp*/, enable); + true /*is xdp*/, true); if (ret) break; } @@ -4574,32 +4573,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) } /** - * i40e_vsi_control_rx - Start or stop a VSI's rings + * i40e_vsi_enable_rx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_rx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - ret = i40e_control_wait_rx_q(pf, pf_q, enable); + ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) { dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); + "VSI seid %d Rx ring %d enable timeout\n", + vsi->seid, pf_q); break; } } - /* Due to HW errata, on Rx disable only, the register can indicate done - * before it really is. Needs 50ms to be sure - */ - if (!enable) - mdelay(50); - return ret; } @@ -4612,29 +4604,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) int ret = 0; /* do rx first for enable and last for disable */ - ret = i40e_vsi_control_rx(vsi, true); + ret = i40e_vsi_enable_rx(vsi); if (ret) return ret; - ret = i40e_vsi_control_tx(vsi, true); + ret = i40e_vsi_enable_tx(vsi); return ret; } +#define I40E_DISABLE_TX_GAP_MSEC 50 + /** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured **/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { + struct i40e_pf *pf = vsi->back; + int pf_q, err, q_end; + /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); - /* do rx first for enable and last for disable - * Ignore return value, we need to shutdown whatever we can - */ - i40e_vsi_control_tx(vsi, false); - i40e_vsi_control_rx(vsi, false); + q_end = vsi->base_queue + vsi->num_queue_pairs; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false); + + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) { + err = i40e_control_wait_rx_q(pf, pf_q, false); + if (err) + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d dissable timeout\n", + vsi->seid, pf_q); + } + + msleep(I40E_DISABLE_TX_GAP_MSEC); + pf_q = vsi->base_queue; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); + + i40e_vsi_wait_queues_disabled(vsi); } /** -- cgit v1.2.3 From 71d6fdba4b2d82fdd883fec31dee77fbcf59773a Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 21 May 2021 18:41:26 +0200 Subject: i40e: Fix firmware LLDP agent related warning Make warning meaningful for the user. Previously the trace: "Starting FW LLDP agent failed: error: I40E_ERR_ADMIN_QUEUE_ERROR, I40E_AQ_RC_EAGAIN" was produced when user tried to start Firmware LLDP agent, just after it was stopped with sequence: ethtool --set-priv-flags disable-fw-lldp on ethtool --set-priv-flags disable-fw-lldp off (without any delay between the commands) At that point the firmware is still processing stop command, the behavior is expected. Fixes: c1041d070437 ("i40e: Missing response checks in driver when starting/stopping FW LLDP") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3e822bad4851..d9e26f9713a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5294,6 +5294,10 @@ flags_complete: dev_warn(&pf->pdev->dev, "Device configuration forbids SW from starting the LLDP agent.\n"); return -EINVAL; + case I40E_AQ_RC_EAGAIN: + dev_warn(&pf->pdev->dev, + "Stop FW LLDP agent command is still being processed, please try again in a second.\n"); + return -EBUSY; default: dev_warn(&pf->pdev->dev, "Starting FW LLDP agent failed: error: %s, %s\n", -- cgit v1.2.3 From dc614c46178b0b89bde86ac54fc687a28580d2b7 Mon Sep 17 00:00:00 2001 From: Lukasz Cieplicki Date: Mon, 31 May 2021 16:55:49 +0000 Subject: i40e: Add additional info to PHY type error In case of PHY type error occurs, the message was too generic. Add additional info to PHY type error indicating that it can be wrong cable connected. Fixes: 124ed15bf126 ("i40e: Add dual speed module support") Signed-off-by: Lukasz Cieplicki Signed-off-by: Michal Maloszewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index d9e26f9713a5..2c9e4eeb7270 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, default: /* if we got here and link is up something bad is afoot */ netdev_info(netdev, - "WARNING: Link is up but PHY type 0x%x is not recognized.\n", + "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n", hw_link_info->phy_type); } -- cgit v1.2.3 From 89ec1f0886c127c7e41ac61a6b6d539f4fb2510b Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Wed, 2 Jun 2021 00:47:03 +0000 Subject: i40e: Fix queue-to-TC mapping on Tx In SW DCB mode the packets sent receive incorrect UP tags. They are constructed correctly and put into tx_ring, but UP is later remapped by HW on the basis of TCTUPR register contents according to Tx queue selected, and BW used is consistent with the new UP values. This is caused by Tx queue selection in kernel not taking into account DCB configuration. This patch fixes the issue by implementing the ndo_select_queue NDO callback. Fixes: fd0a05ce74ef ("i40e: transmit, receive, and NAPI") Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jedrzej Jagielski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 50 +++++++++++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 ++ 3 files changed, 53 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5297e6c59083..278077208f37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13271,6 +13271,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, + .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 38eb8151ee9a..3f25bd8c4924 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3631,6 +3631,56 @@ dma_error: return -1; } +static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev, + const struct sk_buff *skb, + u16 num_tx_queues) +{ + u32 jhash_initval_salt = 0xd631614b; + u32 hash; + + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16)skb->protocol ^ skb->hash; + + hash = jhash_1word(hash, jhash_initval_salt); + + return (u16)(((u64)hash * num_tx_queues) >> 32); +} + +u16 i40e_lan_select_queue(struct net_device *netdev, + struct sk_buff *skb, + struct net_device __always_unused *sb_dev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_hw *hw; + u16 qoffset; + u16 qcount; + u8 tclass; + u16 hash; + u8 prio; + + /* is DCB enabled at all? */ + if (vsi->tc_config.numtc == 1) + return i40e_swdcb_skb_tx_hash(netdev, skb, + netdev->real_num_tx_queues); + + prio = skb->priority; + hw = &vsi->back->hw; + tclass = hw->local_dcbx_config.etscfg.prioritytable[prio]; + /* sanity check */ + if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass)))) + tclass = 0; + + /* select a queue assigned for the given TC */ + qcount = vsi->tc_config.tc_info[tclass].qcount; + hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount); + + qoffset = vsi->tc_config.tc_info[tclass].qoffset; + return qoffset + hash; +} + /** * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring * @xdpf: data to transmit diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 86fed05b4f19..bfc2845c99d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); void i40e_clean_rx_ring(struct i40e_ring *rx_ring); int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring); -- cgit v1.2.3 From ea52faae1d17cd3048681d86d2e8641f44de484d Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 18 Jun 2021 08:49:49 +0000 Subject: i40e: Fix log TC creation failure when max num of queues is exceeded Fix missing failed message if driver does not have enough queues to complete TC command. Without this fix no message is displayed in dmesg. Fixes: a9ce82f744dc ("i40e: Enable 'channel' mode in mqprio for TC configs") Signed-off-by: Grzegorz Szczurek Signed-off-by: Jedrzej Jagielski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 278077208f37..1d1f52756a93 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7290,6 +7290,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, } if (vsi->num_queue_pairs < (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + dev_err(&vsi->back->pdev->dev, + "Failed to create traffic channel, insufficient number of queues.\n"); return -EINVAL; } if (sum_max_rate > i40e_get_link_speed(vsi)) { -- cgit v1.2.3 From d72e91efcae12f2f24ced984d00d60517c677857 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 22 Jul 2021 18:15:51 +0530 Subject: octeontx2-af: Remove unnecessary devm_kfree Remove devm_kfree of memory where VLAN entry to RVU PF mapping info is saved. This will be freed anyway at driver exit. Having this could result in warning from devm_kfree() if the memory is not allocated due to errors in rvu_nix_block_init() before nix_setup_txvlan(). Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries") Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0933699a0d2d..0d2cd5169018 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3842,7 +3842,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr, vlan = &nix_hw->txvlan; kfree(vlan->rsrc.bmap); mutex_destroy(&vlan->rsrc_lock); - devm_kfree(rvu->dev, vlan->entry2pfvf_map); mcast = &nix_hw->mcast; qmem_free(rvu->dev, mcast->mce_ctx); -- cgit v1.2.3 From f8dd60de194817c86bf812700980762bb5a8d9a4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 22 Jul 2021 12:05:41 -0400 Subject: tipc: fix implicit-connect for SYN+ For implicit-connect, when it's either SYN- or SYN+, an ACK should be sent back to the client immediately. It's not appropriate for the client to enter established state only after receiving data from the server. On client side, after the SYN is sent out, tipc_wait_for_connect() should be called to wait for the ACK if timeout is set. This patch also restricts __tipc_sendstream() to call __sendmsg() only when it's in TIPC_OPEN state, so that the client can program in a single loop doing both connecting and data sending like: for (...) sendmsg(dest, buf); This makes the implicit-connect more implicit. Fixes: b97bf3fd8f6a ("[TIPC] Initial merge") Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 34a97ea36cc8..ebd300c26a44 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = 0; } - if (unlikely(syn && !rc)) + if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); + if (timeout) { + timeout = msecs_to_jiffies(timeout); + tipc_wait_for_connect(sock, &timeout); + } + } return rc ? rc : dlen; } @@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) return -EMSGSIZE; /* Handle implicit connection setup */ - if (unlikely(dest)) { + if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { rc = __tipc_sendmsg(sock, m, dlen); if (dlen && dlen == rc) { tsk->peer_caps = tipc_node_get_capabilities(net, dnode); @@ -2689,9 +2695,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern) { struct sock *new_sk, *sk = sock->sk; - struct sk_buff *buf; struct tipc_sock *new_tsock; + struct msghdr m = {NULL,}; struct tipc_msg *msg; + struct sk_buff *buf; long timeo; int res; @@ -2737,19 +2744,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, } /* - * Respond to 'SYN-' by discarding it & returning 'ACK'-. - * Respond to 'SYN+' by queuing it on new socket. + * Respond to 'SYN-' by discarding it & returning 'ACK'. + * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { - struct msghdr m = {NULL,}; - tsk_advance_rx_queue(sk); - __tipc_sendstream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: release_sock(sk); -- cgit v1.2.3 From d237a7f11719ff9320721be5818352e48071aab6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 23 Jul 2021 09:25:34 +0700 Subject: tipc: fix sleeping in tipc accept routine The release_sock() is blocking function, it would change the state after sleeping. In order to evaluate the stated condition outside the socket lock context, switch to use wait_woken() instead. Fixes: 6398e23cdb1d8 ("tipc: standardize accept routine") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/socket.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebd300c26a44..75b99b7eda22 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2652,7 +2652,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2661,12 +2661,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -2678,7 +2678,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), &wait); return err; } -- cgit v1.2.3 From 227adfb2b1dfbc53dfc53b9dd7a93a6298ff7c56 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 22 Jul 2021 20:01:28 +0300 Subject: net: Set true network header for ECN decapsulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where the header straight after the tunnel header was another ethernet header (TEB), instead of the network header, the ECN decapsulation code would treat the ethernet header as if it was an IP header, resulting in mishandling and possible wrong drops or corruption of the IP header. In this case, ECT(1) is sent, so IP_ECN_decapsulate tries to copy it to the inner IPv4 header, and correct its checksum. The offset of the ECT bits in an IPv4 header corresponds to the lower 2 bits of the second octet of the destination MAC address in the ethernet header. The IPv4 checksum corresponds to end of the source address. In order to reproduce: $ ip netns add A $ ip netns add B $ ip -n A link add _v0 type veth peer name _v1 netns B $ ip -n A link set _v0 up $ ip -n A addr add dev _v0 10.254.3.1/24 $ ip -n A route add default dev _v0 scope global $ ip -n B link set _v1 up $ ip -n B addr add dev _v1 10.254.1.6/24 $ ip -n B route add default dev _v1 scope global $ ip -n B link add gre1 type gretap local 10.254.1.6 remote 10.254.3.1 key 0x49000000 $ ip -n B link set gre1 up # Now send an IPv4/GRE/Eth/IPv4 frame where the outer header has ECT(1), # and the inner header has no ECT bits set: $ cat send_pkt.py #!/usr/bin/env python3 from scapy.all import * pkt = IP(b'E\x01\x00\xa7\x00\x00\x00\x00@/`%\n\xfe\x03\x01\n\xfe\x01\x06 \x00eXI\x00' b'\x00\x00\x18\xbe\x92\xa0\xee&\x18\xb0\x92\xa0l&\x08\x00E\x00\x00}\x8b\x85' b'@\x00\x01\x01\xe4\xf2\x82\x82\x82\x01\x82\x82\x82\x02\x08\x00d\x11\xa6\xeb' b'3\x1e\x1e\\xf3\\xf7`\x00\x00\x00\x00ZN\x00\x00\x00\x00\x00\x00\x10\x11\x12' b'\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./01234' b'56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ') send(pkt) $ sudo ip netns exec B tcpdump -neqlllvi gre1 icmp & ; sleep 1 $ sudo ip netns exec A python3 send_pkt.py In the original packet, the source/destinatio MAC addresses are dst=18:be:92:a0:ee:26 src=18:b0:92:a0:6c:26 In the received packet, they are dst=18:bd:92:a0:ee:26 src=18:b0:92:a0:6c:27 Thanks to Lahav Schlesinger and Isaac Garzon for helping me pinpoint the origin. Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040") Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: David Ahern Cc: Jakub Kicinski Cc: Toke Høiland-Jørgensen Signed-off-by: Gilad Naaman Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0dca00745ac3..be75b409445c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - skb_reset_network_header(skb); + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { -- cgit v1.2.3 From 46c7655f0b56b1ac864115441064cde9ed124f4a Mon Sep 17 00:00:00 2001 From: Kangmin Park Date: Fri, 23 Jul 2021 02:44:43 +0900 Subject: ipv6: decrease hop limit counter in ip6_forward() Decrease hop limit counter when deliver skb to ndp proxy. Signed-off-by: Kangmin Park Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e1b9f7ac8bad..8e6ca9ad6812 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -549,9 +549,10 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); - if (proxied > 0) + if (proxied > 0) { + hdr->hop_limit--; return ip6_input(skb); - else if (proxied < 0) { + } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } -- cgit v1.2.3 From c92c74131a84b508aa8f079a25d7bbe10748449e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 16:05:51 +0300 Subject: net: dsa: mv88e6xxx: silently accept the deletion of VID 0 too The blamed commit modified the driver to accept the addition of VID 0 without doing anything, but deleting that VID still fails: [ 32.080780] mv88e6085 d0032004.mdio-mii:10 lan8: failed to kill vid 0081/0 Modify mv88e6xxx_port_vlan_leave() to do the same thing as the addition. Fixes: b8b79c414eca ("net: dsa: mv88e6xxx: Fix adding vlan 0") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index beb41572d04e..272b0535d946 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2155,7 +2155,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip, int i, err; if (!vid) - return -EOPNOTSUPP; + return 0; err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) -- cgit v1.2.3 From 68d1f1d4af188c290087958c75c7b89a816e1137 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 22 Jul 2021 20:21:05 +0200 Subject: wwan: core: Fix missing RTM_NEWLINK event for default link A wwan link created via the wwan_create_default_link procedure is never notified to the user (RTM_NEWLINK), causing issues with user tools relying on such event to track network links (NetworkManager). This is because the procedure misses a call to rtnl_configure_link(), which sets the link as initialized and notifies the new link (cf proper usage in __rtnl_newlink()). Cc: stable@vger.kernel.org Fixes: ca374290aaad ("wwan: core: support default netdev creation") Suggested-by: Sergey Ryazanov Signed-off-by: Loic Poulain Acked-by: Sergey Ryazanov Signed-off-by: David S. Miller --- drivers/net/wwan/wwan_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 3e16c318e705..674a81d79db3 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -984,6 +984,8 @@ static void wwan_create_default_link(struct wwan_device *wwandev, goto unlock; } + rtnl_configure_link(dev, NULL); /* Link initialized, notify new link */ + unlock: rtnl_unlock(); -- cgit v1.2.3 From 9986066d94c971edf19464ed7bf5b26a91520e97 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Fri, 23 Jul 2021 13:36:18 +0530 Subject: octeontx2-af: Fix uninitialized variables in rvu_switch Get the number of VFs of a PF correctly by calling rvu_get_pf_numvfs in rvu_switch_disable function. Also hwvf is not required hence remove it. Fixes: 23109f8dd06d ("octeontx2-af: Introduce internal packet switching") Reported-by: kernel test robot Reported-by: Colin Ian King Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 6 ++++-- drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c | 11 ++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 017163fb3cd5..5fe277e354f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf) /* Get numVFs attached to this PF and first HWVF */ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf)); - *numvfs = (cfg >> 12) & 0xFF; - *hwvf = cfg & 0xFFF; + if (numvfs) + *numvfs = (cfg >> 12) & 0xFF; + if (hwvf) + *hwvf = cfg & 0xFFF; } static int rvu_get_hwvf(struct rvu *rvu, int pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c index 2e5379710aa5..820adf390b8e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) struct rvu_switch *rswitch = &rvu->rswitch; u16 start = rswitch->start_entry; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; u16 pcifunc, entry = 0; + int pf, vf, numvfs; int err; for (pf = 1; pf < hw->total_pfs; pf++) { @@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) rswitch->entry2pcifunc[entry++] = pcifunc; - rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); rvu_get_nix_blkaddr(rvu, pcifunc); @@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu) struct npc_mcam_free_entry_req free_req = { 0 }; struct rvu_switch *rswitch = &rvu->rswitch; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; + int pf, vf, numvfs; struct msg_rsp rsp; u16 pcifunc; int err; @@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu) "Reverting RX rule for PF%d failed(%d)\n", pf, err); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); if (err) -- cgit v1.2.3 From 52f3456a96c06760b9bfae460e39596fec7af22e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 23 Jul 2021 18:31:32 +0300 Subject: net: qrtr: fix memory leaks Syzbot reported memory leak in qrtr. The problem was in unputted struct sock. qrtr_local_enqueue() function calls qrtr_port_lookup() which takes sock reference if port was found. Then there is the following check: if (!ipc || &ipc->sk == skb->sk) { ... return -ENODEV; } Since we should drop the reference before returning from this function and ipc can be non-NULL inside this if, we should add qrtr_port_put() inside this if. The similar corner case is in qrtr_endpoint_post() as Manivannan reported. In case of sock_queue_rcv_skb() failure we need to put port reference to avoid leaking struct sock pointer. Fixes: e04df98adf7d ("net: qrtr: Remove receive worker") Fixes: bdabad3e363d ("net: Add Qualcomm IPC router") Reported-and-tested-by: syzbot+35a511c72ea7356cdcf3@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Reviewed-by: Manivannan Sadhasivam Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index e6f4a6202f82..171b7f3be6ef 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!ipc) goto err; - if (sock_queue_rcv_skb(&ipc->sk, skb)) + if (sock_queue_rcv_skb(&ipc->sk, skb)) { + qrtr_port_put(ipc); goto err; + } qrtr_port_put(ipc); } @@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ + if (ipc) + qrtr_port_put(ipc); kfree_skb(skb); return -ENODEV; } -- cgit v1.2.3 From 15bbf8bb4d4ab87108ecf5f4155ec8ffa3c141d6 Mon Sep 17 00:00:00 2001 From: Paul Jakma Date: Fri, 23 Jul 2021 16:13:04 +0100 Subject: NIU: fix incorrect error return, missed in previous revert Commit 7930742d6, reverting 26fd962, missed out on reverting an incorrect change to a return value. The niu_pci_vpd_scan_props(..) == 1 case appears to be a normal path - treating it as an error and return -EINVAL was breaking VPD_SCAN and causing the driver to fail to load. Fix, so my Neptune card works again. Cc: Kangjie Lu Cc: Shannon Nelson Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: stable Fixes: 7930742d ('Revert "niu: fix missing checks of niu_pci_eeprom_read"') Signed-off-by: Paul Jakma Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 74e748662ec0..860644d182ab 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start) err = niu_pci_vpd_scan_props(np, here, end); if (err < 0) return err; + /* ret == 1 is not an error */ if (err == 1) - return -EINVAL; + return 0; } return 0; } -- cgit v1.2.3 From 6840e17b8ea992453e2d6f460d403cb05d194e76 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:45 -0700 Subject: ionic: make all rx_mode work threadsafe Move the bulk of the code from ionic_set_rx_mode(), which can be called from atomic context, into ionic_lif_rx_mode() which is a safe context. A call from the stack will get pushed off into a work thread, but it is also possible to simultaneously have a call driven by a queue reconfig request from an ethtool command or fw recovery event. We add a mutex around the rx_mode work to be sure they don't collide. Fixes: 81dbc24147f9 ("ionic: change set_rx_mode from_ndo to can_sleep") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 183 +++++++++++------------- drivers/net/ethernet/pensando/ionic/ionic_lif.h | 4 +- 2 files changed, 85 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index af3a5368529c..7815e9034fb8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -29,7 +29,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = { */ }; -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode); +static void ionic_lif_rx_mode(struct ionic_lif *lif); static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr); static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr); static void ionic_link_status_check(struct ionic_lif *lif); @@ -77,7 +77,7 @@ static void ionic_lif_deferred_work(struct work_struct *work) switch (w->type) { case IONIC_DW_TYPE_RX_MODE: - ionic_lif_rx_mode(lif, w->rx_mode); + ionic_lif_rx_mode(lif); break; case IONIC_DW_TYPE_RX_ADDR_ADD: ionic_lif_addr_add(lif, w->addr); @@ -1301,10 +1301,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return 0; } -static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, - bool can_sleep) +static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add) { - struct ionic_deferred_work *work; unsigned int nmfilters; unsigned int nufilters; @@ -1330,97 +1328,46 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, lif->nucast--; } - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD : - IONIC_DW_TYPE_RX_ADDR_DEL; - memcpy(work->addr, addr, ETH_ALEN); - netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n", - add ? "add" : "del", addr); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - netdev_dbg(lif->netdev, "rx_filter %s %pM\n", - add ? "add" : "del", addr); - if (add) - return ionic_lif_addr_add(lif, addr); - else - return ionic_lif_addr_del(lif, addr); - } + netdev_dbg(lif->netdev, "rx_filter %s %pM\n", + add ? "add" : "del", addr); + if (add) + return ionic_lif_addr_add(lif, addr); + else + return ionic_lif_addr_del(lif, addr); return 0; } static int ionic_addr_add(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP); -} - -static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr) -{ - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR); } static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR); } -static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr) +static void ionic_lif_rx_mode(struct ionic_lif *lif) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP); -} - -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) -{ - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_mode_set = { - .opcode = IONIC_CMD_RX_MODE_SET, - .lif_index = cpu_to_le16(lif->index), - .rx_mode = cpu_to_le16(rx_mode), - }, - }; + struct net_device *netdev = lif->netdev; + unsigned int nfilters; + unsigned int nd_flags; char buf[128]; - int err; + u16 rx_mode; int i; #define REMAIN(__x) (sizeof(buf) - (__x)) - i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", - lif->rx_mode, rx_mode); - if (rx_mode & IONIC_RX_MODE_F_UNICAST) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); - if (rx_mode & IONIC_RX_MODE_F_MULTICAST) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); - if (rx_mode & IONIC_RX_MODE_F_BROADCAST) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); - if (rx_mode & IONIC_RX_MODE_F_PROMISC) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); - if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); - netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf); - - err = ionic_adminq_post_wait(lif, &ctx); - if (err) - netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n", - rx_mode, err); - else - lif->rx_mode = rx_mode; -} + mutex_lock(&lif->config_lock); -static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) -{ - struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_deferred_work *work; - unsigned int nfilters; - unsigned int rx_mode; + /* grab the flags once for local use */ + nd_flags = netdev->flags; rx_mode = IONIC_RX_MODE_F_UNICAST; - rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; - rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; - rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; - rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; + rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; + rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; + rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; + rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; /* sync unicast addresses * next check to see if we're in an overflow state @@ -1429,49 +1376,83 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) * we remove our overflow flag and check the netdev flags * to see if we can disable NIC PROMISC */ - if (can_sleep) - __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); if (netdev_uc_count(netdev) + 1 > nfilters) { rx_mode |= IONIC_RX_MODE_F_PROMISC; lif->uc_overflow = true; } else if (lif->uc_overflow) { lif->uc_overflow = false; - if (!(netdev->flags & IFF_PROMISC)) + if (!(nd_flags & IFF_PROMISC)) rx_mode &= ~IONIC_RX_MODE_F_PROMISC; } /* same for multicast */ - if (can_sleep) - __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); if (netdev_mc_count(netdev) > nfilters) { rx_mode |= IONIC_RX_MODE_F_ALLMULTI; lif->mc_overflow = true; } else if (lif->mc_overflow) { lif->mc_overflow = false; - if (!(netdev->flags & IFF_ALLMULTI)) + if (!(nd_flags & IFF_ALLMULTI)) rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; } + i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", + lif->rx_mode, rx_mode); + if (rx_mode & IONIC_RX_MODE_F_UNICAST) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); + if (rx_mode & IONIC_RX_MODE_F_MULTICAST) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); + if (rx_mode & IONIC_RX_MODE_F_BROADCAST) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); + if (rx_mode & IONIC_RX_MODE_F_PROMISC) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); + if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); + if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER"); + netdev_dbg(netdev, "lif%d %s\n", lif->index, buf); + if (lif->rx_mode != rx_mode) { - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "rxmode change dropped\n"); - return; - } - work->type = IONIC_DW_TYPE_RX_MODE; - work->rx_mode = rx_mode; - netdev_dbg(lif->netdev, "deferred: rx_mode\n"); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - ionic_lif_rx_mode(lif, rx_mode); + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_mode_set = { + .opcode = IONIC_CMD_RX_MODE_SET, + .lif_index = cpu_to_le16(lif->index), + }, + }; + int err; + + ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode); + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n", + rx_mode, err); + else + lif->rx_mode = rx_mode; + } + + mutex_unlock(&lif->config_lock); +} + +static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_deferred_work *work; + + if (!can_sleep) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "rxmode change dropped\n"); + return; } + work->type = IONIC_DW_TYPE_RX_MODE; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif); } } @@ -3058,6 +3039,7 @@ void ionic_lif_deinit(struct ionic_lif *lif) ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); + mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); ionic_lif_reset(lif); } @@ -3185,7 +3167,7 @@ static int ionic_station_set(struct ionic_lif *lif) */ if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); } else { /* Update the netdev mac with the device's mac */ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); @@ -3202,7 +3184,7 @@ static int ionic_station_set(struct ionic_lif *lif) netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); return 0; } @@ -3225,6 +3207,7 @@ int ionic_lif_init(struct ionic_lif *lif) lif->hw_index = le16_to_cpu(comp.hw_index); mutex_init(&lif->queue_lock); + mutex_init(&lif->config_lock); /* now that we have the hw_index we can figure out our doorbell page */ lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 346506f01715..af291303bd7a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -108,7 +108,6 @@ struct ionic_deferred_work { struct list_head list; enum ionic_deferred_work_type type; union { - unsigned int rx_mode; u8 addr[ETH_ALEN]; u8 fw_status; }; @@ -179,6 +178,7 @@ struct ionic_lif { unsigned int index; unsigned int hw_index; struct mutex queue_lock; /* lock for queue structures */ + struct mutex config_lock; /* lock for config actions */ spinlock_t adminq_lock; /* lock for AdminQ operations */ struct ionic_qcq *adminqcq; struct ionic_qcq *notifyqcq; @@ -199,7 +199,7 @@ struct ionic_lif { unsigned int nrxq_descs; u32 rx_copybreak; u64 rxq_features; - unsigned int rx_mode; + u16 rx_mode; u64 hw_features; bool registered; bool mc_overflow; -- cgit v1.2.3 From f79eef711eb57d56874b08ea11db69221de54a6d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:46 -0700 Subject: ionic: catch no ptp support earlier If PTP configuration is attempted on ports that don't support it, such as VF ports, the driver will return an error status -95, or EOPNOSUPP and print an error message enp98s0: hwstamp set failed: -95 Because some daemons can retry every few seconds, this can end up filling the dmesg log and pushing out other more useful messages. We can catch this issue earlier in our handling and return the error without a log message. Fixes: 829600ce5e4e ("ionic: add ts_config replay") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.h | 7 ++----- drivers/net/ethernet/pensando/ionic/ionic_phc.c | 10 +++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index af291303bd7a..69ab59fedb6c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type, int ionic_lif_size(struct ionic *ionic); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -int ionic_lif_hwstamp_replay(struct ionic_lif *lif); +void ionic_lif_hwstamp_replay(struct ionic_lif *lif); int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); @@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif); void ionic_lif_alloc_phc(struct ionic_lif *lif); void ionic_lif_free_phc(struct ionic_lif *lif); #else -static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif) -{ - return -EOPNOTSUPP; -} +static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {} static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index a87c87e86aef..6e2403c71608 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) struct hwtstamp_config config; int err; + if (!lif->phc || !lif->phc->ptp) + return -EOPNOTSUPP; + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) return 0; } -int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +void ionic_lif_hwstamp_replay(struct ionic_lif *lif) { int err; + if (!lif->phc || !lif->phc->ptp) + return; + err = ionic_lif_hwstamp_set_ts_config(lif, NULL); if (err) netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); - - return err; } int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) -- cgit v1.2.3 From a6ff85e0a2d9d074a4b4c291ba9ec1e5b0aba22b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:47 -0700 Subject: ionic: remove intr coalesce update from napi Move the interrupt coalesce value update out of the napi thread and into the dim_work thread and set it only when it has actually changed. Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 14 +++++++++++++- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 4 ---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 7815e9034fb8..e795fa63ca12 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -53,7 +53,19 @@ static void ionic_dim_work(struct work_struct *work) cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); - qcq->intr.dim_coal_hw = new_coal ? new_coal : 1; + new_coal = new_coal ? new_coal : 1; + + if (qcq->intr.dim_coal_hw != new_coal) { + unsigned int qi = qcq->cq.bound_q->index; + struct ionic_lif *lif = qcq->q.lif; + + qcq->intr.dim_coal_hw = new_coal; + + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, + lif->rxqcqs[qi]->intr.index, + qcq->intr.dim_coal_hw); + } + dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 08934888575c..9d3a04110685 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -463,10 +463,6 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; - ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); - dim_update_sample(qcq->cq.bound_intr->rearm_count, lif->txqstats[qi].pkts, lif->txqstats[qi].bytes, -- cgit v1.2.3 From 76ed8a4a00b484dcccef819ef2618bcf8e46f560 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:48 -0700 Subject: ionic: fix up dim accounting for tx and rx We need to count the correct Tx and/or Rx packets for dynamic interrupt moderation, depending on which we're processing on the queue interrupt. Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 28 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 9d3a04110685..1c6e2b9fc96b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -451,11 +451,12 @@ void ionic_rx_empty(struct ionic_queue *q) q->tail_idx = 0; } -static void ionic_dim_update(struct ionic_qcq *qcq) +static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode) { struct dim_sample dim_sample; struct ionic_lif *lif; unsigned int qi; + u64 pkts, bytes; if (!qcq->intr.dim_coal_hw) return; @@ -463,10 +464,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; + switch (napi_mode) { + case IONIC_LIF_F_TX_DIM_INTR: + pkts = lif->txqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes; + break; + case IONIC_LIF_F_RX_DIM_INTR: + pkts = lif->rxqstats[qi].pkts; + bytes = lif->rxqstats[qi].bytes; + break; + default: + pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes; + break; + } + dim_update_sample(qcq->cq.bound_intr->rearm_count, - lif->txqstats[qi].pkts, - lif->txqstats[qi].bytes, - &dim_sample); + pkts, bytes, &dim_sample); net_dim(&qcq->dim, dim_sample); } @@ -487,7 +501,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) ionic_tx_service, NULL, NULL); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -526,7 +540,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(cq->bound_q); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -572,7 +586,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, 0); flags |= IONIC_INTR_CRED_UNMASK; rxcq->bound_intr->rearm_count++; } -- cgit v1.2.3 From f07f9815b7046e25cc32bf8542c9c0bbc5eb6e0e Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:49 -0700 Subject: ionic: count csum_none when offload enabled Be sure to count the csum_none cases when csum offload is enabled. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 1c6e2b9fc96b..08870190e4d2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -274,12 +274,11 @@ static void ionic_rx_clean(struct ionic_queue *q, } } - if (likely(netdev->features & NETIF_F_RXCSUM)) { - if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = (__force __wsum)le16_to_cpu(comp->csum); - stats->csum_complete++; - } + if (likely(netdev->features & NETIF_F_RXCSUM) && + (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = (__force __wsum)le16_to_cpu(comp->csum); + stats->csum_complete++; } else { stats->csum_none++; } -- cgit v1.2.3 From f5d156c7bfab7d728b2fd35bc63eab12eda18125 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:34:37 +0800 Subject: arm64: dts: imx8mp: remove fallback compatible string for FlexCAN FlexCAN on i.MX8MP is not derived from i.MX6Q, instead reuses from i.MX8QM with extra ECC added and default is enabled, so that the FlexCAN would be put into freeze mode without FLEXCAN_QUIRK_DISABLE_MECR quirk. This patch removes "fsl,imx6q-flexcan" fallback compatible string since it's not compatible with the i.MX6Q. Link: https://lore.kernel.org/r/20210719073437.32078-1-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Reviewed-by: Fabio Estevam Signed-off-by: Marc Kleine-Budde --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ca38d0d6c3c4..f4eaab3ecf03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -579,7 +579,7 @@ }; flexcan1: can@308c0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308c0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, @@ -594,7 +594,7 @@ }; flexcan2: can@308d0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308d0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, -- cgit v1.2.3 From 54f93336d000229f72c26d8a3f69dd256b744528 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 22 Jul 2021 15:08:19 +0800 Subject: can: raw: raw_setsockopt(): fix raw_rcv panic for sock UAF We get a bug during ltp can_filter test as following. =========================================== [60919.264984] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [60919.265223] PGD 8000003dda726067 P4D 8000003dda726067 PUD 3dda727067 PMD 0 [60919.265443] Oops: 0000 [#1] SMP PTI [60919.265550] CPU: 30 PID: 3638365 Comm: can_filter Kdump: loaded Tainted: G W 4.19.90+ #1 [60919.266068] RIP: 0010:selinux_socket_sock_rcv_skb+0x3e/0x200 [60919.293289] RSP: 0018:ffff8d53bfc03cf8 EFLAGS: 00010246 [60919.307140] RAX: 0000000000000000 RBX: 000000000000001d RCX: 0000000000000007 [60919.320756] RDX: 0000000000000001 RSI: ffff8d5104a8ed00 RDI: ffff8d53bfc03d30 [60919.334319] RBP: ffff8d9338056800 R08: ffff8d53bfc29d80 R09: 0000000000000001 [60919.347969] R10: ffff8d53bfc03ec0 R11: ffffb8526ef47c98 R12: ffff8d53bfc03d30 [60919.350320] perf: interrupt took too long (3063 > 2500), lowering kernel.perf_event_max_sample_rate to 65000 [60919.361148] R13: 0000000000000001 R14: ffff8d53bcf90000 R15: 0000000000000000 [60919.361151] FS: 00007fb78b6b3600(0000) GS:ffff8d53bfc00000(0000) knlGS:0000000000000000 [60919.400812] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [60919.413730] CR2: 0000000000000010 CR3: 0000003e3f784006 CR4: 00000000007606e0 [60919.426479] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [60919.439339] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [60919.451608] PKRU: 55555554 [60919.463622] Call Trace: [60919.475617] [60919.487122] ? update_load_avg+0x89/0x5d0 [60919.498478] ? update_load_avg+0x89/0x5d0 [60919.509822] ? account_entity_enqueue+0xc5/0xf0 [60919.520709] security_sock_rcv_skb+0x2a/0x40 [60919.531413] sk_filter_trim_cap+0x47/0x1b0 [60919.542178] ? kmem_cache_alloc+0x38/0x1b0 [60919.552444] sock_queue_rcv_skb+0x17/0x30 [60919.562477] raw_rcv+0x110/0x190 [can_raw] [60919.572539] can_rcv_filter+0xbc/0x1b0 [can] [60919.582173] can_receive+0x6b/0xb0 [can] [60919.591595] can_rcv+0x31/0x70 [can] [60919.600783] __netif_receive_skb_one_core+0x5a/0x80 [60919.609864] process_backlog+0x9b/0x150 [60919.618691] net_rx_action+0x156/0x400 [60919.627310] ? sched_clock_cpu+0xc/0xa0 [60919.635714] __do_softirq+0xe8/0x2e9 [60919.644161] do_softirq_own_stack+0x2a/0x40 [60919.652154] [60919.659899] do_softirq.part.17+0x4f/0x60 [60919.667475] __local_bh_enable_ip+0x60/0x70 [60919.675089] __dev_queue_xmit+0x539/0x920 [60919.682267] ? finish_wait+0x80/0x80 [60919.689218] ? finish_wait+0x80/0x80 [60919.695886] ? sock_alloc_send_pskb+0x211/0x230 [60919.702395] ? can_send+0xe5/0x1f0 [can] [60919.708882] can_send+0xe5/0x1f0 [can] [60919.715037] raw_sendmsg+0x16d/0x268 [can_raw] It's because raw_setsockopt() concurrently with unregister_netdevice_many(). Concurrent scenario as following. cpu0 cpu1 raw_bind raw_setsockopt unregister_netdevice_many unlist_netdevice dev_get_by_index raw_notifier raw_enable_filters ...... can_rx_register can_rcv_list_find(..., net->can.rx_alldev_list) ...... sock_close raw_release(sock_a) ...... can_receive can_rcv_filter(net->can.rx_alldev_list, ...) raw_rcv(skb, sock_a) BUG After unlist_netdevice(), dev_get_by_index() return NULL in raw_setsockopt(). Function raw_enable_filters() will add sock and can_filter to net->can.rx_alldev_list. Then the sock is closed. Followed by, we sock_sendmsg() to a new vcan device use the same can_filter. Protocol stack match the old receiver whose sock has been released on net->can.rx_alldev_list in can_rcv_filter(). Function raw_rcv() uses the freed sock. UAF BUG is triggered. We can find that the key issue is that net_device has not been protected in raw_setsockopt(). Use rtnl_lock to protect net_device in raw_setsockopt(). Fixes: c18ce101f2e4 ("[CAN]: Add raw protocol") Link: https://lore.kernel.org/r/20210722070819.1048263-1-william.xuanziyang@huawei.com Cc: linux-stable Signed-off-by: Ziyang Xuan Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index ed4fcb7ab0c3..cd5a49380116 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; } + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + if (count > 1) + kfree(filter); + err = -ENODEV; + goto out_fil; + } + } if (ro->bound) { /* (try to) register the new filters */ @@ -588,6 +596,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, dev_put(dev); release_sock(sk); + rtnl_unlock(); break; @@ -600,10 +609,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, err_mask &= CAN_ERR_MASK; + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + } /* remove current error mask */ if (ro->bound) { @@ -627,6 +642,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, dev_put(dev); release_sock(sk); + rtnl_unlock(); break; -- cgit v1.2.3 From 0c71437dd50dd687c15d8ca80b3b68f10bb21d63 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 14 Jul 2021 13:16:02 +0200 Subject: can: j1939: j1939_session_deactivate(): clarify lifetime of session object The j1939_session_deactivate() is decrementing the session ref-count and potentially can free() the session. This would cause use-after-free situation. However, the code calling j1939_session_deactivate() does always hold another reference to the session, so that it would not be free()ed in this code path. This patch adds a comment to make this clear and a WARN_ON, to ensure that future changes will not violate this requirement. Further this patch avoids dereferencing the session pointer as a precaution to avoid use-after-free if the session is actually free()ed. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/r/20210714111602.24021-1-o.rempel@pengutronix.de Reported-by: Xiaochen Zou Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index c3946c355882..bb1092c3e7e3 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1075,11 +1075,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session) static bool j1939_session_deactivate(struct j1939_session *session) { + struct j1939_priv *priv = session->priv; bool active; - j1939_session_list_lock(session->priv); + j1939_session_list_lock(priv); + /* This function should be called with a session ref-count of at + * least 2. + */ + WARN_ON_ONCE(kref_read(&session->kref) < 2); active = j1939_session_deactivate_locked(session); - j1939_session_list_unlock(session->priv); + j1939_session_list_unlock(priv); return active; } -- cgit v1.2.3 From c6eea1c8bda56737752465a298dc6ce07d6b8ce3 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 6 Jul 2021 19:00:08 +0800 Subject: can: j1939: j1939_xtp_rx_dat_one(): fix rxtimer value between consecutive TP.DT to 750ms For receive side, the max time interval between two consecutive TP.DT should be 750ms. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/r/1625569210-47506-1-git-send-email-zhangchangzhong@huawei.com Cc: linux-stable Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bb1092c3e7e3..bdc95bd7a851 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1874,7 +1874,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (!session->transmission) j1939_tp_schedule_txtimer(session, 0); } else { - j1939_tp_set_rxtimeout(session, 250); + j1939_tp_set_rxtimeout(session, 750); } session->last_cmd = 0xff; consume_skb(se_skb); -- cgit v1.2.3 From 590eb2b7d8cfafb27e8108d52d4bf4850626d31d Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 25 Jun 2021 15:09:29 +0200 Subject: can: peak_usb: pcan_usb_handle_bus_evt(): fix reading rxerr/txerr values This patch fixes an incorrect way of reading error counters in messages received for this purpose from the PCAN-USB interface. These messages inform about the increase or decrease of the error counters, whose values are placed in bytes 1 and 2 of the message data (not 0 and 1). Fixes: ea8b33bde76c ("can: pcan_usb: add support of rxerr/txerr counters") Link: https://lore.kernel.org/r/20210625130931.27438-4-s.grosjean@peak-system.com Cc: linux-stable Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 1d6f77252f01..899a3d21b77f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -117,7 +117,8 @@ #define PCAN_USB_BERR_MASK (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR) /* identify bus event packets with rx/tx error counters */ -#define PCAN_USB_ERR_CNT 0x80 +#define PCAN_USB_ERR_CNT_DEC 0x00 /* counters are decreasing */ +#define PCAN_USB_ERR_CNT_INC 0x80 /* counters are increasing */ /* private to PCAN-USB adapter */ struct pcan_usb { @@ -608,11 +609,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir) /* acccording to the content of the packet */ switch (ir) { - case PCAN_USB_ERR_CNT: + case PCAN_USB_ERR_CNT_DEC: + case PCAN_USB_ERR_CNT_INC: /* save rx/tx error counters from in the device context */ - pdev->bec.rxerr = mc->ptr[0]; - pdev->bec.txerr = mc->ptr[1]; + pdev->bec.rxerr = mc->ptr[1]; + pdev->bec.txerr = mc->ptr[2]; break; default: -- cgit v1.2.3 From ef68a717960658e6a1e5f08adb0574326e9a12c2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 24 Apr 2021 16:20:39 +0200 Subject: can: mcp251xfd: mcp251xfd_irq(): stop timestamping worker in case error in IRQ In case an error occurred in the IRQ handler, the chip status is dumped via devcoredump and all IRQs are disabled, but the chip stays powered for further analysis. The chip is in an undefined state and will not receive any CAN frames, so shut down the timestamping worker, which reads the TBC register regularly, too. This avoids any CRC read error messages if there is a communication problem with the chip. Fixes: efd8d98dfb90 ("can: mcp251xfd: add HW timestamp infrastructure") Link: https://lore.kernel.org/r/20210724155131.471303-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 47c3f408a799..9ae48072b6c6 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2300,6 +2300,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) err, priv->regs_status.intf); mcp251xfd_dump(priv); mcp251xfd_chip_interrupts_disable(priv); + mcp251xfd_timestamp_stop(priv); return handled; } -- cgit v1.2.3 From 3cf4375a090473d240281a0d2b04a3a5aaeac34b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 23 Jul 2021 18:46:01 -0400 Subject: tipc: do not write skb_shinfo frags when doing decrytion One skb's skb_shinfo frags are not writable, and they can be shared with other skbs' like by pskb_copy(). To write the frags may cause other skb's data crash. So before doing en/decryption, skb_cow_data() should always be called for a cloned or nonlinear skb if req dst is using the same sg as req src. While at it, the likely branch can be removed, as it will be covered by skb_cow_data(). Note that esp_input() has the same issue, and I will fix it in another patch. tipc_aead_encrypt() doesn't have this issue, as it only processes linear data in the unlikely branch. Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/crypto.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index e5c43d4d5a75..c9391d38de85 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, if (unlikely(!aead)) return -ENOKEY; - /* Cow skb data if needed */ - if (likely(!skb_cloned(skb) && - (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) { - nsg = 1 + skb_shinfo(skb)->nr_frags; - } else { - nsg = skb_cow_data(skb, 0, &unused); - if (unlikely(nsg < 0)) { - pr_err("RX: skb_cow_data() returned %d\n", nsg); - return nsg; - } + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; } /* Allocate memory for the AEAD operation */ -- cgit v1.2.3 From 89bc7f456cd40e0be7b94f4fdae9186f22b76a05 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 23 Jul 2021 17:53:48 -0400 Subject: bnxt_en: Add missing periodic PHC overflow check We use the timecounter APIs for the 48-bit PHC and packet timestamps. We must periodically update the timecounter at roughly half the overflow interval. The overflow interval is about 78 hours, so update it every 19 hours (1/4 interval) for some extra margins. Fixes: 390862f45c85 ("bnxt_en: Get the full 48-bit hardware timestamp periodically") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 7 +++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 9089e7f3fbd4..ec381c2423b8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -353,6 +353,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) bnxt_ptp_get_current_time(bp); ptp->next_period = now + HZ; + if (time_after_eq(now, ptp->next_overflow_check)) { + spin_lock_bh(&ptp->ptp_lock); + timecounter_read(&ptp->tc); + spin_unlock_bh(&ptp->ptp_lock); + ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; + } return HZ; } @@ -423,6 +429,7 @@ int bnxt_ptp_init(struct bnxt *bp) ptp->cc.shift = 0; ptp->cc.mult = 1; + ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); ptp->ptp_info = bnxt_ptp_caps; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 4135ea3ec788..254ba7bc0f99 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -32,6 +32,10 @@ struct bnxt_ptp_cfg { u64 current_time; u64 old_time; unsigned long next_period; + unsigned long next_overflow_check; + /* 48-bit PHC overflows in 78 hours. Check overflow every 19 hours. */ + #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ) + u16 tx_seqid; struct bnxt *bp; atomic_t tx_avail; -- cgit v1.2.3 From ac059d16442f30e6a9a95d41655153e01247e710 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 25 Jul 2021 13:28:24 +0530 Subject: octeontx2-af: Fix PKIND overlap between LBK and LMAC interfaces Currently PKINDs are not assigned to LBK channels. The default value of LBK_CHX_PKIND (channel to PKIND mapping) register is zero, which is resulting in a overlap of pkind between LBK and CGX LMACs. When KPU1 parser config is modified when PTP timestamping is enabled on the CGX LMAC interface it is impacting traffic on LBK interfaces as well. This patch fixes the issue by reserving the PKIND#0 for LBK devices. CGX mapped PF pkind starts from 1 and also fixes the max pkind available. Fixes: 421572175ba5 ("octeontx2-af: Support to enable/disable HW timestamping") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/npc.h | 3 +++ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 11 +++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 19bad9a59c8f..243cf8070e77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype { * Software assigns pkind for each incoming port such as CGX * Ethernet interfaces, LBK interfaces, etc. */ +#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND + enum npc_pkind_type { + NPC_RX_LBK_PKIND = 0ULL, NPC_RX_VLAN_EXDSA_PKIND = 56ULL, NPC_RX_CHLEN24B_PKIND = 57ULL, NPC_RX_CPT_HDR_PKIND, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0d2cd5169018..30067668eda7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -298,6 +298,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) rvu_nix_chan_lbk(rvu, lbkid, vf + 1); pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; + rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf); rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 1097291aaa45..52b255426c22 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; int num_pkinds, num_kpus, idx; - struct npc_pkind *pkind; /* Disable all KPUs and their entries */ for (idx = 0; idx < hw->npc_kpus; idx++) { @@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) * Check HW max count to avoid configuring junk or * writing to unsupported CSR addresses. */ - pkind = &hw->pkind; num_pkinds = rvu->kpu.pkinds; - num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds); + num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds); for (idx = 0; idx < num_pkinds; idx++) npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true); @@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) if (npc_const1 & BIT_ULL(63)) npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2); - pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL; + pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT; + hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL; hw->npc_kpu_entries = npc_const1 & 0xFFFULL; hw->npc_kpus = (npc_const >> 8) & 0x1FULL; hw->npc_intfs = npc_const & 0xFULL; @@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu) err = rvu_alloc_bitmap(&pkind->rsrc); if (err) return err; + /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0', + * no need to configure PKIND for all LBKs separately. + */ + rvu_alloc_rsrc(&pkind->rsrc); /* Allocate mem for pkind to PF and channel mapping info */ pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max, -- cgit v1.2.3 From 69f0aeb13bb548e2d5710a350116e03f0273302e Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 25 Jul 2021 13:29:03 +0530 Subject: octeontx2-pf: Fix interface down flag on error In the existing code while changing the number of TX/RX queues using ethtool the PF/VF interface resources are freed and reallocated (otx2_stop and otx2_open is called) if the device is in running state. If any resource allocation fails in otx2_open, driver free already allocated resources and return. But again, when the number of queues changes as the device state still running oxt2_stop is called. In which we try to free already freed resources leading to driver crash. This patch fixes the issue by setting the INTF_DOWN flag on error and free the resources in otx2_stop only if the flag is not set. Fixes: 50fe6c02e5ad ("octeontx2-pf: Register and handle link notifications") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 7 +++---- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 8df748e0677b..b906a0eb6e0d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev, err = otx2_set_real_num_queues(dev, channel->tx_count, channel->rx_count); if (err) - goto fail; + return err; pfvf->hw.rx_queues = channel->rx_count; pfvf->hw.tx_queues = channel->tx_count; pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues; -fail: if (if_up) - dev->netdev_ops->ndo_open(dev); + err = dev->netdev_ops->ndo_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", pfvf->hw.tx_queues, pfvf->hw.rx_queues); @@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->rqe_cnt = rx_count; if (if_up) - netdev->netdev_ops->ndo_open(netdev); + return netdev->netdev_ops->ndo_open(netdev); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index f300b807a85b..2c24944a4dba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev) err_tx_stop_queues: netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); + pf->flags |= OTX2_FLAG_INTF_DOWN; err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, @@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev) struct otx2_rss_info *rss; int qidx, vec, wrk; + /* If the DOWN flag is set resources are already freed */ + if (pf->flags & OTX2_FLAG_INTF_DOWN) + return 0; + netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); -- cgit v1.2.3 From 4c85e57575fb9e6405d02d55aef8025c60abb824 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sun, 25 Jul 2021 13:29:37 +0530 Subject: octeontx2-pf: Dont enable backpressure on LBK links Avoid configure backpressure for LBK links as they don't support it and enable lmacs before configuration pause frames. Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool") Signed-off-by: Geetha sowjanya Signed-off-by: Hariprasad Kelam Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 9169849881bf..544c96c8fe1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx) /* Add reference */ cgx->lmac_idmap[lmac->lmac_id] = lmac; - cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); set_bit(lmac->lmac_id, &cgx->lmac_bmap); + cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); } return cgx_lmac_verify_fwi_version(cgx); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 7cccd802c4ed..70fcc1fd962f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -924,12 +924,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt); aq->cq.drop_ena = 1; - /* Enable receive CQ backpressure */ - aq->cq.bp_ena = 1; - aq->cq.bpid = pfvf->bpid[0]; + if (!is_otx2_lbkvf(pfvf->pdev)) { + /* Enable receive CQ backpressure */ + aq->cq.bp_ena = 1; + aq->cq.bpid = pfvf->bpid[0]; - /* Set backpressure level is same as cq pass level */ - aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + /* Set backpressure level is same as cq pass level */ + aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + } } /* Fill AQ info */ @@ -1186,7 +1188,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, aq->aura.fc_hyst_bits = 0; /* Store count on all updates */ /* Enable backpressure for RQ aura */ - if (aura_id < pfvf->hw.rqpool_cnt) { + if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) { aq->aura.bp_ena = 0; aq->aura.nix0_bpid = pfvf->bpid[0]; /* Set backpressure level for RQ's Aura */ -- cgit v1.2.3 From 149ea30fdd5c28b89a3bfdecfc75cdab1deddb14 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 23 Jul 2021 17:56:00 +0300 Subject: devlink: Fix phys_port_name of virtual port and merge error Merge commit cited in fixes tag was incorrect. Due to it phys_port_name of the virtual port resulted in incorrect name. Also the phys_port_name of the physical port was written twice due to the merge error. Fix it by removing the old code and inserting back the misplaced code. Related commits of interest in net and net-next branches that resulted in merge conflict are: in net-next branch: commit f285f37cb1e6 ("devlink: append split port number to the port name") in net branch: commit b28d8f0c25a9 ("devlink: Correct VIRTUAL port to not have phys_port attributes") Fixes: 126285651b7 ("Merge ra.kernel.org:/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Parav Pandit Reported-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: David S. Miller --- net/core/devlink.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fdd04f00fd7..85032626de24 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9328,18 +9328,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: n = snprintf(name, len, "p%u", attrs->phys.port_number); if (n < len && attrs->split) n += snprintf(name + n, len - n, "s%u", attrs->phys.split_subport_number); - if (!attrs->split) - n = snprintf(name, len, "p%u", attrs->phys.port_number); - else - n = snprintf(name, len, "p%us%u", - attrs->phys.port_number, - attrs->phys.split_subport_number); - break; case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -9381,6 +9373,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) -- cgit v1.2.3 From ad4e1e48a6291f7fb53fbef38ca264966ffd65c9 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Fri, 23 Jul 2021 21:59:27 +0800 Subject: net: phy: broadcom: re-add check for PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54811 PHY Restore PHY_ID_BCM54811 accidently removed by commit 5d4358ede8eb. Fixes: 5d4358ede8eb ("net: phy: broadcom: Allow BCM54210E to configure APD") Signed-off-by: Kevin Lo Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7bf3011b8e77..83aea5c5cd03 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) { if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E || BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 || - BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E) + BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811) val |= BCM54XX_SHD_SCR3_RXCTXC_DIS; else val |= BCM54XX_SHD_SCR3_TRDDAPD; -- cgit v1.2.3 From 7e4960b3d66d7248b23de3251118147812b42da2 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 23 Jul 2021 18:36:09 +0800 Subject: mlx4: Fix missing error code in mlx4_load_one() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'err'. Eliminate the follow smatch warning: drivers/net/ethernet/mellanox/mlx4/main.c:3538 mlx4_load_one() warn: missing error code 'err'. Reported-by: Abaci Robot Fixes: 7ae0e400cd93 ("net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs") Signed-off-by: Jiapeng Chong Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 00c84656b2e7..28ac4693da3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3535,6 +3535,7 @@ slave_start: if (!SRIOV_VALID_STATE(dev->flags)) { mlx4_err(dev, "Invalid SRIOV state\n"); + err = -EINVAL; goto err_close; } } -- cgit v1.2.3 From 795e3d2ea68e489ee7039ac29e98bfea0e34a96c Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Sun, 25 Jul 2021 23:28:04 +0530 Subject: net: qede: Fix end of loop tests for list_for_each_entry The list_for_each_entry() iterator, "vlan" in this code, can never be NULL so the warning will never be printed. Signed-off-by: Harshvardhan Jha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index c59b72c90293..a2e4dfb5cb44 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev) int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qede_dev *edev = netdev_priv(dev); - struct qede_vlan *vlan = NULL; + struct qede_vlan *vlan; int rc = 0; DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid); @@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) if (vlan->vid == vid) break; - if (!vlan || (vlan->vid != vid)) { + if (list_entry_is_head(vlan, &edev->vlan_list, list)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "Vlan isn't configured\n"); goto out; -- cgit v1.2.3 From 058e6e0ed0eace43401c945082dec1d669b5b231 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:50 -0400 Subject: sctp: improve the code for pmtu probe send and recv update This patch does 3 things: - make sctp_transport_pl_send() and sctp_transport_pl_recv() return bool type to decide if more probe is needed to send. - pr_debug() only when probe is really needed to send. - count pl.raise_count in sctp_transport_pl_send() instead of sctp_transport_pl_recv(), and it's only incremented for the 1st probe for the same size. These are preparations for the next patch to make probes happen only when there's packet loss in Search Complete state. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/sm_statefuns.c | 15 +++++++-------- net/sctp/transport.c | 41 +++++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 32fc4a309df5..f3d414ed208e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1024,8 +1024,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -void sctp_transport_pl_send(struct sctp_transport *t); -void sctp_transport_pl_recv(struct sctp_transport *t); +bool sctp_transport_pl_send(struct sctp_transport *t); +bool sctp_transport_pl_recv(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 09a8f23ec709..32df65f68c12 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net, if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; - sctp_transport_pl_send(transport); - - reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); - if (!reply) - return SCTP_DISPOSITION_NOMEM; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + if (sctp_transport_pl_send(transport)) { + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + } sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, !sctp_transport_pl_enabled(link)) return SCTP_DISPOSITION_DISCARD; - sctp_transport_pl_recv(link); - if (link->pl.state == SCTP_PL_COMPLETE) + if (sctp_transport_pl_recv(link)) return SCTP_DISPOSITION_CONSUME; return sctp_sf_send_probe(net, ep, asoc, type, link, commands); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 397a6244dd97..23e7bd3e3bd4 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -258,16 +258,12 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) sctp_transport_pl_update(transport); } -void sctp_transport_pl_send(struct sctp_transport *t) +bool sctp_transport_pl_send(struct sctp_transport *t) { - pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", - __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); - - if (t->pl.probe_count < SCTP_MAX_PROBES) { - t->pl.probe_count++; - return; - } + if (t->pl.probe_count < SCTP_MAX_PROBES) + goto out; + t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ @@ -299,10 +295,20 @@ void sctp_transport_pl_send(struct sctp_transport *t) sctp_assoc_sync_pmtu(t->asoc); } } - t->pl.probe_count = 1; + +out: + if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && + !t->pl.probe_count) + t->pl.raise_count++; + + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + t->pl.probe_count++; + return true; } -void sctp_transport_pl_recv(struct sctp_transport *t) +bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -323,7 +329,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t) if (!t->pl.probe_high) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); - return; + return false; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { @@ -335,14 +341,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t) t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } - } else if (t->pl.state == SCTP_PL_COMPLETE) { - t->pl.raise_count++; - if (t->pl.raise_count == 30) { - /* Raise probe_size again after 30 * interval in Search Complete */ - t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ - t->pl.probe_size += SCTP_PL_MIN_STEP; - } + } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; } + + return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) -- cgit v1.2.3 From eacf078cf4c7aa23e9591738511f142cc39b5186 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:51 -0400 Subject: sctp: send pmtu probe only if packet loss in Search Complete state This patch is to introduce last_rtx_chunks into sctp_transport to detect if there's any packet retransmission/loss happened by checking against asoc's rtx_data_chunks in sctp_transport_pl_send(). If there is, namely, transport->last_rtx_chunks != asoc->rtx_data_chunks, the pmtu probe will be sent out. Otherwise, increment the pl.raise_count and return when it's in Search Complete state. With this patch, if in Search Complete state, which is a long period, it doesn't need to keep probing the current pmtu unless there's data packet loss. This will save quite some traffic. v1->v2: - add the missing Fixes tag. Fixes: 0dac127c0557 ("sctp: do black hole detection in search complete state") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/transport.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f3d414ed208e..651bba654d77 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,6 +984,7 @@ struct sctp_transport { } cacc; struct { + __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 23e7bd3e3bd4..a3d3ca6dd63d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -263,6 +263,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t) if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ @@ -298,8 +299,10 @@ bool sctp_transport_pl_send(struct sctp_transport *t) out: if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && - !t->pl.probe_count) + !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) { t->pl.raise_count++; + return false; + } pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -313,6 +316,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t) pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { -- cgit v1.2.3 From 94cbe7db7d757c2d481c3617ab5579a28cfc2175 Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Mon, 26 Jul 2021 10:20:20 +0800 Subject: net: stmmac: add est_irq_status callback function for GMAC 4.10 and 5.10 Assign dwmac5_est_irq_status to est_irq_status callback function for GMAC 4.10 and 5.10. With this, EST related interrupts could be handled properly. Fixes: e49aa315cb01 ("net: stmmac: EST interrupts handling and error reporting") Cc: # 5.13.x Signed-off-by: Mohammad Athari Bin Ismail Acked-by: Wong Vee Khee Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 67ba083eb90c..b21745368983 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, @@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, -- cgit v1.2.3 From 2ebda027148315581b89a2ed2fef84ad53b2aedd Mon Sep 17 00:00:00 2001 From: Chen Shen Date: Mon, 26 Jul 2021 13:47:34 +0800 Subject: sctp: delete addr based on sin6_scope_id sctp_inet6addr_event deletes 'addr' from 'local_addr_list' when setting netdev down, but it is possible to delete the incorrect entry (match the first one with the same ipaddr, but the different 'ifindex'), if there are some netdevs with the same 'local-link' ipaddr added already. It should delete the entry depending on 'sin6_addr' and 'sin6_scope_id' both. otherwise, the endpoint will call 'sctp_sf_ootb' if it can't find the according association when receives 'heartbeat', and finally will reply 'abort'. For example: 1.when linux startup the entries in local_addr_list: ifindex:35 addr:fe80::40:43ff:fe80:0 (eths0.201) ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209) ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210) the route table: local fe80::40:43ff:fe80:0 dev eths0.201 local fe80::40:43ff:fe80:0 dev eths0.209 local fe80::40:43ff:fe80:0 dev eths0.210 2.after 'ifconfig eths0.209 down' the entries in local_addr_list: ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209) ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210) the route table: local fe80::40:43ff:fe80:0 dev eths0.201 local fe80::40:43ff:fe80:0 dev eths0.210 3.asoc not found for src:[fe80::40:43ff:fe80:0]:37381 dst:[:1]:53335 ::1->fe80::40:43ff:fe80:0 HEARTBEAT fe80::40:43ff:fe80:0->::1 ABORT Signed-off-by: Chen Shen Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e48dd909dee5..470dbdc27d58 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, list_for_each_entry_safe(addr, temp, &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && - ipv6_addr_equal(&addr->a.v6.sin6_addr, - &ifa->addr)) { + ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr) && + addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; -- cgit v1.2.3 From 92766c4628ea349c8ddab0cd7bd0488f36e5c4ce Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Sun, 25 Jul 2021 21:45:12 +0800 Subject: net/qla3xxx: fix schedule while atomic in ql_wait_for_drvr_lock and ql_adapter_reset When calling the 'ql_wait_for_drvr_lock' and 'ql_adapter_reset', the driver has already acquired the spin lock, so the driver should not call 'ssleep' in atomic context. This bug can be fixed by using 'mdelay' instead of 'ssleep'. Reported-by: Letu Ren Signed-off-by: Letu Ren Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2376b2729633..c00ad57575ea 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev) "driver lock acquired\n"); return 1; } - ssleep(1); + mdelay(1000); } while (++i < 10); netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n"); @@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) if ((value & ISP_CONTROL_SR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); /* @@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) ispControlStatus); if ((value & ISP_CONTROL_FSR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); } if (max_wait_time == 0) -- cgit v1.2.3 From 758684e49f4c7ea2a75e249e486659f0950cd63e Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 26 Jul 2021 14:52:48 -0400 Subject: bnxt_en: Fix static checker warning in bnxt_fw_reset_task() Now that we return when bnxt_open() fails in bnxt_fw_reset_task(), there is no need to check for 'rc' value again before invoking bnxt_reenable_sriov(). Fixes: 3958b1da725a ("bnxt_en: fix error path of FW reset") Reported-by: Dan Carpenter Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4db162cee911..89606587b156 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12131,9 +12131,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_ulp_start(bp, rc); - if (!rc) - bnxt_reenable_sriov(bp); + bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); bnxt_dl_health_recovery_done(bp); -- cgit v1.2.3 From fcef709c2c4baf758950bd7395e4b10527b81e2c Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Sun, 25 Jul 2021 18:54:52 +0530 Subject: octeontx2-af: Do NIX_RX_SW_SYNC twice NIX_RX_SW_SYNC ensures all existing transactions are finished and pkts are written to LLC/DRAM, queues should be teared down after successful SW_SYNC. Due to a HW errata, in some rare scenarios an existing transaction might end after SW_SYNC operation. To ensure operation is fully done, do the SW_SYNC twice. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 30067668eda7..4bfbbdf38770 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -196,11 +196,22 @@ static void nix_rx_sync(struct rvu *rvu, int blkaddr) { int err; - /*Sync all in flight RX packets to LLC/DRAM */ + /* Sync all in flight RX packets to LLC/DRAM */ rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); if (err) - dev_err(rvu->dev, "NIX RX software sync failed\n"); + dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n"); + + /* SW_SYNC ensures all existing transactions are finished and pkts + * are written to LLC/DRAM, queues should be teared down after + * successful SW_SYNC. Due to a HW errata, in some rare scenarios + * an existing transaction might end after SW_SYNC operation. To + * ensure operation is fully done, do the SW_SYNC twice. + */ + rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); + err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); + if (err) + dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n"); } static bool is_valid_txschq(struct rvu *rvu, int blkaddr, -- cgit v1.2.3 From c7c9d2102c9c098916ab9e0ab248006107d00d6c Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 00:11:59 +0300 Subject: net: llc: fix skb_over_panic Syzbot reported skb_over_panic() in llc_pdu_init_as_xid_cmd(). The problem was in wrong LCC header manipulations. Syzbot's reproducer tries to send XID packet. llc_ui_sendmsg() is doing following steps: 1. skb allocation with size = len + header size len is passed from userpace and header size is 3 since addr->sllc_xid is set. 2. skb_reserve() for header_len = 3 3. filling all other space with memcpy_from_msg() Ok, at this moment we have fully loaded skb, only headers needs to be filled. Then code comes to llc_sap_action_send_xid_c(). This function pushes 3 bytes for LLC PDU header and initializes it. Then comes llc_pdu_init_as_xid_cmd(). It initalizes next 3 bytes *AFTER* LLC PDU header and call skb_push(skb, 3). This looks wrong for 2 reasons: 1. Bytes rigth after LLC header are user data, so this function was overwriting payload. 2. skb_push(skb, 3) call can cause skb_over_panic() since all free space was filled in llc_ui_sendmsg(). (This can happen is user passed 686 len: 686 + 14 (eth header) + 3 (LLC header) = 703. SKB_DATA_ALIGN(703) = 704) So, in this patch I added 2 new private constansts: LLC_PDU_TYPE_U_XID and LLC_PDU_LEN_U_XID. LLC_PDU_LEN_U_XID is used to correctly reserve header size to handle LLC + XID case. LLC_PDU_TYPE_U_XID is used by llc_pdu_header_init() function to push 6 bytes instead of 3. And finally I removed skb_push() call from llc_pdu_init_as_xid_cmd(). This changes should not affect other parts of LLC, since after all steps we just transmit buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+5e5a981ad7cc54c4b2b4@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 31 +++++++++++++++++++++++-------- net/llc/af_llc.c | 10 +++++++++- net/llc/llc_s_ac.c | 2 +- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index c0f0a13ed818..49aa79c7b278 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -15,9 +15,11 @@ #include /* Lengths of frame formats */ -#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ -#define LLC_PDU_LEN_S 4 -#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ +#define LLC_PDU_LEN_S 4 +#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +/* header and 1 control byte and XID info */ +#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ @@ -50,9 +52,10 @@ #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 -#define LLC_PDU_TYPE_I 0 /* first bit */ -#define LLC_PDU_TYPE_S 1 /* first two bits */ -#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_I 0 /* first bit */ +#define LLC_PDU_TYPE_S 1 /* first two bits */ +#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) @@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { - const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; + switch (type) { + case LLC_PDU_TYPE_U: + hlen = 3; + break; + case LLC_PDU_TYPE_U_XID: + hlen = 6; + break; + } + skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); @@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, sizeof(struct llc_xid_info)); + + /* no need to push/put since llc_pdu_header_init() has already + * pushed 3 + 3 bytes + */ } /** diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7180979114e4..ac5cadd02cfa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; - if (addr->sllc_test || addr->sllc_xid) + if (addr->sllc_test) rc = LLC_PDU_LEN_U; + else if (addr->sllc_xid) + /* We need to expand header to sizeof(struct llc_xid_info) + * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header + * as XID PDU. In llc_ui_sendmsg() we reserved header size and then + * filled all other space with user data. If we won't reserve this + * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data + */ + rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index b554f26c68ee..79d1cef8f15a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; - llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, + llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); -- cgit v1.2.3 From 801e541c79bbc63af852ca21b713ba87cc97c6ad Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Tue, 27 Jul 2021 20:25:06 +0800 Subject: nfc: s3fwrn5: fix undefined parameter values in dev_err() In the function s3fwrn5_fw_download(), the 'ret' is not assigned, so the correct value should be given in dev_err function. Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index eb5d7a5beac7..1340fab9565e 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%d)\n", ret); + "Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm)); goto out; } -- cgit v1.2.3 From 8373cd38a8888549ace7c7617163a2e826970a92 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 27 Jul 2021 22:03:50 +0800 Subject: net: hns3: change the method of obtaining default ptp cycle The ptp cycle is related to the hardware, so it may cause compatibility issues if a fixed value is used in driver. Therefore, the method of obtaining this value is changed to read from the register rather than use a fixed value in driver. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 36 +++++++++++++++++----- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h | 10 ++++-- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 3b1f84502e36..befa9bcc2f2f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -5,9 +5,27 @@ #include "hclge_main.h" #include "hnae3.h" +static int hclge_ptp_get_cycle(struct hclge_dev *hdev) +{ + struct hclge_ptp *ptp = hdev->ptp; + + ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) & + HCLGE_PTP_CYCLE_QUO_MASK; + ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); + ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + + if (ptp->cycle.den == 0) { + dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n"); + return -EINVAL; + } + + return 0; +} + static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) { struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp); + struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle; u64 adj_val, adj_base, diff; unsigned long flags; bool is_neg = false; @@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) is_neg = true; } - adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT; + adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer; adj_val = adj_base * ppb; diff = div_u64(adj_val, 1000000000ULL); @@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) /* This clock cycle is defined by three part: quotient, numerator * and denominator. For example, 2.5ns, the quotient is 2, - * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator - * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT. + * denominator is fixed to ptp->cycle.den, and numerator + * is 0.5 * ptp->cycle.den. */ - quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator); + quo = div_u64_rem(adj_val, cycle->den, &numerator); spin_lock_irqsave(&hdev->ptp->lock, flags); - writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); + writel(quo & HCLGE_PTP_CYCLE_QUO_MASK, + hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); - writel(HCLGE_PTP_CYCLE_ADJ_UNIT, - hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); writel(HCLGE_PTP_CYCLE_ADJ_EN, hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG); spin_unlock_irqrestore(&hdev->ptp->lock, flags); @@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_create_clock(hdev); if (ret) return ret; + + ret = hclge_ptp_get_cycle(hdev); + if (ret) + return ret; } ret = hclge_ptp_int_en(hdev, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index 5a202b775471..dbf5f4c08019 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -29,6 +29,7 @@ #define HCLGE_PTP_TIME_ADJ_REG 0x60 #define HCLGE_PTP_TIME_ADJ_EN BIT(0) #define HCLGE_PTP_CYCLE_QUO_REG 0x64 +#define HCLGE_PTP_CYCLE_QUO_MASK GENMASK(7, 0) #define HCLGE_PTP_CYCLE_DEN_REG 0x68 #define HCLGE_PTP_CYCLE_NUM_REG 0x6C #define HCLGE_PTP_CYCLE_CFG_REG 0x70 @@ -37,9 +38,7 @@ #define HCLGE_PTP_CUR_TIME_SEC_L_REG 0x78 #define HCLGE_PTP_CUR_TIME_NSEC_REG 0x7C -#define HCLGE_PTP_CYCLE_ADJ_BASE 2 #define HCLGE_PTP_CYCLE_ADJ_MAX 500000000 -#define HCLGE_PTP_CYCLE_ADJ_UNIT 100000000 #define HCLGE_PTP_SEC_H_OFFSET 32u #define HCLGE_PTP_SEC_L_MASK GENMASK(31, 0) @@ -47,6 +46,12 @@ #define HCLGE_PTP_FLAG_TX_EN 1 #define HCLGE_PTP_FLAG_RX_EN 2 +struct hclge_ptp_cycle { + u32 quo; + u32 numer; + u32 den; +}; + struct hclge_ptp { struct hclge_dev *hdev; struct ptp_clock *clock; @@ -58,6 +63,7 @@ struct hclge_ptp { spinlock_t lock; /* protects ptp registers */ u32 ptp_cfg; u32 last_tx_seqid; + struct hclge_ptp_cycle cycle; unsigned long tx_start; unsigned long tx_cnt; unsigned long tx_skipped; -- cgit v1.2.3 From 343597d558e79fe704ba8846b5b2ed24056b89c2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:04:58 -0700 Subject: bpf, sockmap: Zap ingress queues after stopping strparser We don't want strparser to run and pass skbs into skmsg handlers when the psock is null. We just sk_drop them in this case. When removing a live socket from map it means extra drops that we do not need to incur. Move the zap below strparser close to avoid this condition. This way we stop the stream parser first stopping it from processing packets and then delete the psock. Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-2-john.fastabend@gmail.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 15d71288e741..28115ef742e8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -773,8 +773,6 @@ static void sk_psock_destroy(struct work_struct *work) void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_stop(psock, false); - write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); @@ -784,6 +782,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); + sk_psock_stop(psock, false); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); } -- cgit v1.2.3 From 476d98018f32e68e7c5d4e8456940cf2b6d66f10 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:04:59 -0700 Subject: bpf, sockmap: On cleanup we additionally need to remove cached skb Its possible if a socket is closed and the receive thread is under memory pressure it may have cached a skb. We need to ensure these skbs are free'd along with the normal ingress_skb queue. Before 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") tear down and backlog processing both had sock_lock for the common case of socket close or unhash. So it was not possible to have both running in parrallel so all we would need is the kfree in those kernels. But, latest kernels include the commit 799aa7f98d5e and this requires a bit more work. Without the ingress_lock guarding reading/writing the state->skb case its possible the tear down could run before the state update causing it to leak memory or worse when the backlog reads the state it could potentially run interleaved with the tear down and we might end up free'ing the state->skb from tear down side but already have the reference from backlog side. To resolve such races we wrap accesses in ingress_lock on both sides serializing tear down and backlog case. In both cases this only happens after an EAGAIN error case so having an extra lock in place is likely fine. The normal path will skip the locks. Note, we check state->skb before grabbing lock. This works because we can only enqueue with the mutex we hold already. Avoiding a race on adding state->skb after the check. And if tear down path is running that is also fine if the tear down path then removes state->skb we will simply set skb=NULL and the subsequent goto is skipped. This slight complication avoids locking in normal case. With this fix we no longer see this warning splat from tcp side on socket close when we hit the above case with redirect to ingress self. [224913.935822] WARNING: CPU: 3 PID: 32100 at net/core/stream.c:208 sk_stream_kill_queues+0x212/0x220 [224913.935841] Modules linked in: fuse overlay bpf_preload x86_pkg_temp_thermal intel_uncore wmi_bmof squashfs sch_fq_codel efivarfs ip_tables x_tables uas xhci_pci ixgbe mdio xfrm_algo xhci_hcd wmi [224913.935897] CPU: 3 PID: 32100 Comm: fgs-bench Tainted: G I 5.14.0-rc1alu+ #181 [224913.935908] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [224913.935914] RIP: 0010:sk_stream_kill_queues+0x212/0x220 [224913.935923] Code: 8b 83 20 02 00 00 85 c0 75 20 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 df e8 2b 11 fe ff eb c3 0f 0b e9 7c ff ff ff 0f 0b eb ce <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 90 0f 1f 44 00 00 41 57 41 [224913.935932] RSP: 0018:ffff88816271fd38 EFLAGS: 00010206 [224913.935941] RAX: 0000000000000ae8 RBX: ffff88815acd5240 RCX: dffffc0000000000 [224913.935948] RDX: 0000000000000003 RSI: 0000000000000ae8 RDI: ffff88815acd5460 [224913.935954] RBP: ffff88815acd5460 R08: ffffffff955c0ae8 R09: fffffbfff2e6f543 [224913.935961] R10: ffffffff9737aa17 R11: fffffbfff2e6f542 R12: ffff88815acd5390 [224913.935967] R13: ffff88815acd5480 R14: ffffffff98d0c080 R15: ffffffff96267500 [224913.935974] FS: 00007f86e6bd1700(0000) GS:ffff888451cc0000(0000) knlGS:0000000000000000 [224913.935981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [224913.935988] CR2: 000000c0008eb000 CR3: 00000001020e0005 CR4: 00000000003706e0 [224913.935994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [224913.936000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [224913.936007] Call Trace: [224913.936016] inet_csk_destroy_sock+0xba/0x1f0 [224913.936033] __tcp_close+0x620/0x790 [224913.936047] tcp_close+0x20/0x80 [224913.936056] inet_release+0x8f/0xf0 [224913.936070] __sock_release+0x72/0x120 [224913.936083] sock_close+0x14/0x20 Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-3-john.fastabend@gmail.com --- net/core/skmsg.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 28115ef742e8..036cdb33a94a 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -590,23 +590,42 @@ static void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +static void sk_psock_skb_state(struct sk_psock *psock, + struct sk_psock_work_state *state, + struct sk_buff *skb, + int len, int off) +{ + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + state->skb = skb; + state->len = len; + state->off = off; + } else { + sock_drop(psock->sk, skb); + } + spin_unlock_bh(&psock->ingress_lock); +} + static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; - struct sk_buff *skb; + struct sk_buff *skb = NULL; bool ingress; u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (state->skb) { + if (unlikely(state->skb)) { + spin_lock_bh(&psock->ingress_lock); skb = state->skb; len = state->len; off = state->off; state->skb = NULL; - goto start; + spin_unlock_bh(&psock->ingress_lock); } + if (skb) + goto start; while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; @@ -621,9 +640,8 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - state->skb = skb; - state->len = len; - state->off = off; + sk_psock_skb_state(psock, state, skb, + len, off); goto end; } /* Hard errors break pipe and stop xmit. */ @@ -722,6 +740,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } + kfree_skb(psock->work_state.skb); + /* We null the skb here to ensure that calls to sk_psock_backlog + * do not pick up the free'd skb. + */ + psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } -- cgit v1.2.3 From 9635720b7c88592214562cb72605bdab6708006c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:05:00 -0700 Subject: bpf, sockmap: Fix memleak on ingress msg enqueue If backlog handler is running during a tear down operation we may enqueue data on the ingress msg queue while tear down is trying to free it. sk_psock_backlog() sk_psock_handle_skb() skb_psock_skb_ingress() sk_psock_skb_ingress_enqueue() sk_psock_queue_msg(psock,msg) spin_lock(ingress_lock) sk_psock_zap_ingress() _sk_psock_purge_ingerss_msg() _sk_psock_purge_ingress_msg() -- free ingress_msg list -- spin_unlock(ingress_lock) spin_lock(ingress_lock) list_add_tail(msg,ingress_msg) <- entry on list with no one left to free it. spin_unlock(ingress_lock) To fix we only enqueue from backlog if the ENABLED bit is set. The tear down logic clears the bit with ingress_lock set so we wont enqueue the msg in the last step. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-4-john.fastabend@gmail.com --- include/linux/skmsg.h | 54 +++++++++++++++++++++++++++++++++------------------ net/core/skmsg.c | 6 ------ 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 96f319099744..14ab0c0bc924 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static inline void sock_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + kfree_skb(skb); +} + +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) +{ + if (msg->skb) + sock_drop(psock->sk, msg->skb); + kfree(msg); +} + static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); - list_add_tail(&msg->list, &psock->ingress_msg); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + list_add_tail(&msg->list, &psock->ingress_msg); + else + drop_sk_msg(psock, msg); spin_unlock_bh(&psock->ingress_lock); } @@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk, psock->psock_update_sk_prot(sk, psock, true); } -static inline void sk_psock_set_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - set_bit(bit, &psock->state); -} - -static inline void sk_psock_clear_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - clear_bit(bit, &psock->state); -} - -static inline bool sk_psock_test_state(const struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - return test_bit(bit, &psock->state); -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 036cdb33a94a..2d6249b28928 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -584,12 +584,6 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } -static void sock_drop(struct sock *sk, struct sk_buff *skb) -{ - sk_drops_add(sk, skb); - kfree_skb(skb); -} - static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, struct sk_buff *skb, -- cgit v1.2.3 From 8b54874ef1617185048029a3083d510569e93751 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 26 Jul 2021 09:20:14 +0300 Subject: net/mlx5: Fix flow table chaining Fix a bug when flow table is created in priority that already has other flow tables as shown in the below diagram. If the new flow table (FT-B) has the lowest level in the priority, we need to connect the flow tables from the previous priority (p0) to this new table. In addition when this flow table is destroyed (FT-B), we need to connect the flow tables from the previous priority (p0) to the next level flow table (FT-C) in the same priority of the destroyed table (if exists). --------- |root_ns| --------- | -------------------------------- | | | ---------- ---------- --------- |p(prio)-x| | p-y | | p-n | ---------- ---------- --------- | | ---------------- ------------------ |ns(e.g bypass)| |ns(e.g. kernel) | ---------------- ------------------ | | | ------- ------ ---- | p0 | | p1 | |p2| ------- ------ ---- | | \ -------- ------- ------ | FT-A | |FT-B | |FT-C| -------- ------- ------ Fixes: f90edfd279f3 ("net/mlx5_core: Connect flow tables") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d7bf0a3e4a52..c0697e1b7118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { - struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *next_ft, *first_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ - if (list_empty(&prio->node.children)) { + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { err = connect_prev_fts(dev, ft, prio); if (err) return err; - next_ft = find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(prio); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) node.list) == ft)) return 0; - next_ft = find_next_chained_ft(prio); + next_ft = find_next_ft(ft); err = connect_fwd_rules(dev, next_ft, ft); if (err) return err; -- cgit v1.2.3 From 90b22b9bcd242a3ba238f2c6f7eab771799001f8 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 8 Jul 2021 15:24:58 +0300 Subject: net/mlx5e: Disable Rx ntuple offload for uplink representor Rx ntuple offload is not supported in switchdev mode. Tryng to enable it cause kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 80000001065a5067 P4D 80000001065a5067 PUD 106594067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 7 PID: 1089 Comm: ethtool Not tainted 5.13.0-rc7_for_upstream_min_debug_2021_06_23_16_44 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5e_arfs_enable+0x70/0xd0 [mlx5_core] Code: 44 24 10 00 00 00 00 48 c7 44 24 18 00 00 00 00 49 63 c4 48 89 e2 44 89 e6 48 69 c0 20 08 00 00 48 89 ef 48 03 85 68 ac 00 00 <48> 8b 40 08 48 89 44 24 08 e8 d2 aa fd ff 48 83 05 82 96 18 00 01 RSP: 0018:ffff8881047679e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000004000000000 RCX: 0000004000000000 RDX: ffff8881047679e0 RSI: 0000000000000000 RDI: ffff888115100880 RBP: ffff888115100880 R08: ffffffffa00f6cb0 R09: ffff888104767a18 R10: ffff8881151000a0 R11: ffff888109479540 R12: 0000000000000000 R13: ffff888104767bb8 R14: ffff888115100000 R15: ffff8881151000a0 FS: 00007f41a64ab740(0000) GS:ffff8882f5dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104cbc005 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: set_feature_arfs+0x1e/0x40 [mlx5_core] mlx5e_handle_feature+0x43/0xa0 [mlx5_core] mlx5e_set_features+0x139/0x1b0 [mlx5_core] __netdev_update_features+0x2b3/0xaf0 ethnl_set_features+0x176/0x3a0 ? __nla_parse+0x22/0x30 genl_family_rcv_msg_doit+0xe2/0x140 genl_rcv_msg+0xde/0x1d0 ? features_reply_size+0xe0/0xe0 ? genl_get_cmd+0xd0/0xd0 netlink_rcv_skb+0x4e/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2b0 netlink_sendmsg+0x225/0x450 sock_sendmsg+0x33/0x40 __sys_sendto+0xd4/0x120 ? __sys_recvmsg+0x4e/0x90 ? exc_page_fault+0x219/0x740 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f41a65b0cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffd8d688358 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000010f42a0 RCX: 00007f41a65b0cba RDX: 0000000000000058 RSI: 00000000010f43b0 RDI: 0000000000000003 RBP: 000000000047ae60 R08: 00007f41a667c000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 00000000010f4340 R13: 00000000010f4350 R14: 00007ffd8d688400 R15: 00000000010f42a0 Modules linked in: mlx5_vdpa vhost_iotlb vdpa xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core ptp pps_core fuse CR2: 0000000000000008 ---[ end trace c66523f2aba94b43 ]--- Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 29 ++++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09e65557e75..c6f99fc77411 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3829,6 +3829,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return 0; } +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + return features; +} + static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { @@ -3860,15 +3878,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); } - if (mlx5e_is_uplink_rep(priv)) { - features &= ~NETIF_F_HW_TLS_RX; - if (netdev->features & NETIF_F_HW_TLS_RX) - netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); - - features &= ~NETIF_F_HW_TLS_TX; - if (netdev->features & NETIF_F_HW_TLS_TX) - netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); - } + if (mlx5e_is_uplink_rep(priv)) + features = mlx5e_fix_uplink_rep_features(netdev, features); mutex_unlock(&priv->state_lock); -- cgit v1.2.3 From c671972534c6f7fce789ac8156a2bc3bd146f806 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 22 Jun 2021 17:07:02 +0300 Subject: net/mlx5: E-Switch, Set destination vport vhca id only when merged eswitch is supported Destination vport vhca id is valid flag is set only merged eswitch isn't supported. Change destination vport vhca id value to be set also only when merged eswitch is supported. Fixes: e4ad91f23f10 ("net/mlx5e: Split offloaded eswitch TC rules for port mirroring") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7579f3402776..b0a2ca9037ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; - dest[dest_idx].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; -- cgit v1.2.3 From dd3fddb82780bfa24124834edd90bbc63bd689cc Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Jun 2021 14:17:07 +0300 Subject: net/mlx5: E-Switch, handle devcom events only for ports on the same device This is the same check as LAG mode checks if to enable lag. This will fix adding peer miss rules if lag is not supported and even an incorrect rules in socket direct mode. Also fix the incorrect comment on mlx5_get_next_phys_dev() as flow #1 doesn't exists. Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 5 +---- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ceebfc20f65e..def2156e50ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* This function is called with two flows: - * 1. During initialization of mlx5_core_dev and we don't need to lock it. - * 2. During LAG configure stage and caller holds &mlx5_intf_mutex. - */ +/* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { struct auxiliary_device *adev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0a2ca9037ac..011e766e4f67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2368,6 +2368,9 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) + break; + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; -- cgit v1.2.3 From e2351e517068718724f1d3b4010e2a41ec91fa76 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Jun 2021 13:45:05 +0300 Subject: net/mlx5e: RX, Avoid possible data corruption when relaxed ordering and LRO combined When HW aggregates packets for an LRO session, it writes the payload of two consecutive packets of a flow contiguously, so that they usually share a cacheline. The first byte of a packet's payload is written immediately after the last byte of the preceding packet. In this flow, there are two consecutive write requests to the shared cacheline: 1. Regular write for the earlier packet. 2. Read-modify-write for the following packet. In case of relaxed-ordering on, these two writes might be re-ordered. Using the end padding optimization (to avoid partial write for the last cacheline of a packet) becomes problematic if the two writes occur out-of-order, as the padding would overwrite payload that belongs to the following packet, causing data corruption. Avoid this by disabling the end padding optimization when both LRO and relaxed-ordering are enabled. Fixes: 17347d5430c4 ("net/mlx5e: Add support for PCI relaxed ordering") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 150c8e82c738..2cbf18c967f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -471,6 +471,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && params->lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -508,7 +517,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); -- cgit v1.2.3 From 9841d58f3550d11c6181424427e8ad8c9c80f1b6 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 30 Jun 2021 13:33:31 +0300 Subject: net/mlx5e: Add NETIF_F_HW_TC to hw_features when HTB offload is available If a feature flag is only present in features, but not in hw_features, the user can't reset it. Although hw_features may contain NETIF_F_HW_TC by the point where the driver checks whether HTB offload is supported, this flag is controlled by another condition that may not hold. Set it explicitly to make sure the user can disable it. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c6f99fc77411..c5a2e3e6fe4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4870,6 +4870,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (MLX5_CAP_ETH(mdev, scatter_fcs)) netdev->hw_features |= NETIF_F_RXFCS; + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->features = netdev->hw_features; /* Defaults */ @@ -4890,8 +4893,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } - if (mlx5_qos_is_supported(mdev)) - netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; -- cgit v1.2.3 From a759f845d1f78634b54744db0fa48524ef6d0e14 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 30 Jun 2021 11:17:12 +0300 Subject: net/mlx5e: Consider PTP-RQ when setting RX VLAN stripping Add PTP-RQ to the loop when setting rx-vlan-offload feature via ethtool. On PTP-RQ's creation, set rx-vlan-offload into its parameters. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 778e229310a9..07b429b94d93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, params->log_sq_size = orig->log_sq_size; mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); } - if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } } static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c5a2e3e6fe4b..37c440837945 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3384,7 +3384,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { - int err = 0; + int err; int i; for (i = 0; i < chs->num; i++) { @@ -3392,6 +3392,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) if (err) return err; } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); return 0; } -- cgit v1.2.3 From 497008e783452a2ec45c7ec5835cfe6950dcb097 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 21 Jun 2021 18:04:07 +0300 Subject: net/mlx5e: Fix page allocation failure for trap-RQ over SF Set the correct device pointer to the trap-RQ, to allow access to dma_mask and avoid allocation request with the wrong pci-dev. WARNING: CPU: 1 PID: 12005 at kernel/dma/mapping.c:151 dma_map_page_attrs+0x139/0x1c0 ... all Trace: ? __page_pool_alloc_pages_slow+0x5a/0x210 mlx5e_post_rx_wqes+0x258/0x400 [mlx5_core] mlx5e_trap_napi_poll+0x44/0xc0 [mlx5_core] __napi_poll+0x24/0x150 net_rx_action+0x22b/0x280 __do_softirq+0xc7/0x27e do_softirq+0x61/0x80 __local_bh_enable_ip+0x4b/0x50 mlx5e_handle_action_trap+0x2dd/0x4d0 [mlx5_core] blocking_notifier_call_chain+0x5a/0x80 mlx5_devlink_trap_action_set+0x8b/0x100 [mlx5_core] Fixes: 5543e989fe5e ("net/mlx5e: Add trap entity to ETH driver") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 86ab4e864fe6..7f94508594fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params struct mlx5e_priv *priv = t->priv; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; -- cgit v1.2.3 From 678b1ae1af4aef488fcc42baa663e737b9a531ba Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jun 2021 10:24:17 +0300 Subject: net/mlx5e: Fix page allocation failure for ptp-RQ over SF Set the correct pci-device pointer to the ptp-RQ. This allows access to dma_mask and avoids allocation request with wrong pci-device. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 07b429b94d93..efef4adce086 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -497,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, int err; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; -- cgit v1.2.3 From 7f331bf0f060c2727e36d64f9b098b4ee5f3dfad Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 16 Jun 2021 19:11:03 +0300 Subject: net/mlx5: Unload device upon firmware fatal error When fw_fatal reporter reports an error, the firmware in not responding. Unload the device to ensure that the driver closes all its resources, even if recovery is not due (user disabled auto-recovery or reporter is in grace period). On successful recovery the device is loaded back up. Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9ff163c5bcde..9abeb80ffa31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } fw_reporter_ctx.err_synd = health->synd; fw_reporter_ctx.miss_counter = health->miss_counter; - devlink_health_report(health->fw_fatal_reporter, - "FW fatal error reported", &fw_reporter_ctx); + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev); + } } static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { -- cgit v1.2.3 From b1c2f6312c5005c928a72e668bf305a589d828d4 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: net/mlx5e: Fix nullptr in mlx5e_hairpin_get_mdev() The result of __dev_get_by_index() is not checked for NULL and then gets dereferenced immediately. Also, __dev_get_by_index() must be called while holding either RTNL lock or @dev_base_lock, which isn't satisfied by mlx5e_hairpin_get_mdev() or its callers. This makes the underlying hlist_for_each_entry() loop not safe, and can have adverse effects in itself. Fix by using dev_get_by_index() and handling nullptr return value when ifindex device is not found. Update mlx5e_hairpin_get_mdev() callers to check for possible PTR_ERR() result. Fixes: 77ab67b7f0f9 ("net/mlx5e: Basic setup of hairpin object") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 33 +++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 629a61e8022f..d273758255c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -452,12 +452,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, static struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) { + struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_priv *priv; - netdev = __dev_get_by_index(net, ifindex); + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + priv = netdev_priv(netdev); - return priv->mdev; + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; } static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) @@ -666,6 +686,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params func_mdev = priv->mdev; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); if (IS_ERR(pair)) { @@ -804,6 +828,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, int err; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); return -EOPNOTSUPP; -- cgit v1.2.3 From 740452e09cf5fc489ce60831cf11abef117b5d26 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 26 Apr 2021 11:06:37 +0800 Subject: net/mlx5: Fix mlx5_vport_tbl_attr chain from u16 to u32 The offending refactor commit uses u16 chain wrongly. Actually, it should be u32. Fixes: c620b772152b ("net/mlx5: Refactor tc flow attributes structure") CC: Ariel Levkovich Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48cac5bf606d..d562edf5b0bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -636,7 +636,7 @@ struct esw_vport_tbl_namespace { }; struct mlx5_vport_tbl_attr { - u16 chain; + u32 chain; u16 prio; u16 vport; const struct esw_vport_tbl_namespace *vport_ns; -- cgit v1.2.3 From 46573e3ab08fb041d5ba7bf7bf3215a1e724c78c Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Wed, 28 Jul 2021 09:49:25 +0800 Subject: nfc: s3fwrn5: fix undefined parameter values in dev_err() In the function s3fwrn5_fw_download(), the 'ret' is not assigned, so the correct value should be given in dev_err function. Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Reviewed-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 1340fab9565e..e3e72b8a29e3 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm)); + "Cannot allocate shash (code=%pe)\n", tfm); goto out; } -- cgit v1.2.3 From 557fb5862c9272ad9b21407afe1da8acfd9b53eb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 27 Jul 2021 23:40:54 -0300 Subject: sctp: fix return value check in __sctp_rcv_asconf_lookup As Ben Hutchings noticed, this check should have been inverted: the call returns true in case of success. Reported-by: Ben Hutchings Fixes: 0c5dc070ff3d ("sctp: validate from_addr_param return") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index eb3c2a34a31c..5ef86fdb1176 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (unlikely(!af)) return NULL; - if (af->from_addr_param(&paddr, param, peer_port, 0)) + if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp); -- cgit v1.2.3 From 76a16be07b209a3f507c72abe823bd3af1c8661a Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 28 Jul 2021 15:43:13 +0800 Subject: tulip: windbond-840: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/winbond-840.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index f6ff1f76eacb..1876f15dd827 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0; void __iomem *ioaddr; - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; pci_set_master(pdev); @@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size); if (!ioaddr) - goto err_out_free_res; + goto err_out_netdev; for (i = 0; i < 3; i++) ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i)); @@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_cleardev: pci_iounmap(pdev, ioaddr); -err_out_free_res: - pci_release_regions(pdev); err_out_netdev: free_netdev (dev); return -ENODEV; @@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev) if (dev) { struct netdev_private *np = netdev_priv(dev); unregister_netdev(dev); - pci_release_regions(pdev); pci_iounmap(pdev, np->base_addr); free_netdev(dev); } -- cgit v1.2.3 From 5e7b30d24a5b8cb691c173b45b50e3ca0191be19 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jul 2021 08:49:09 +0200 Subject: nfc: nfcsim: fix use after free during module unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a use after free memory corruption during module exit: - nfcsim_exit() - nfcsim_device_free(dev0) - nfc_digital_unregister_device() This iterates over command queue and frees all commands, - dev->up = false - nfcsim_link_shutdown() - nfcsim_link_recv_wake() This wakes the sleeping thread nfcsim_link_recv_skb(). - nfcsim_link_recv_skb() Wake from wait_event_interruptible_timeout(), call directly the deb->cb callback even though (dev->up == false), - digital_send_cmd_complete() Dereference of "struct digital_cmd" cmd which was freed earlier by nfc_digital_unregister_device(). This causes memory corruption shortly after (with unrelated stack trace): nfc nfc0: NFC: nfcsim_recv_wq: Device is down llcp: nfc_llcp_recv: err -19 nfc nfc1: NFC: nfcsim_recv_wq: Device is down BUG: unable to handle page fault for address: ffffffffffffffed Call Trace: fsnotify+0x54b/0x5c0 __fsnotify_parent+0x1fe/0x300 ? vfs_write+0x27c/0x390 vfs_write+0x27c/0x390 ksys_write+0x63/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae KASAN report: BUG: KASAN: use-after-free in digital_send_cmd_complete+0x16/0x50 Write of size 8 at addr ffff88800a05f720 by task kworker/0:2/71 Workqueue: events nfcsim_recv_wq [nfcsim] Call Trace:  dump_stack_lvl+0x45/0x59  print_address_description.constprop.0+0x21/0x140  ? digital_send_cmd_complete+0x16/0x50  ? digital_send_cmd_complete+0x16/0x50  kasan_report.cold+0x7f/0x11b  ? digital_send_cmd_complete+0x16/0x50  ? digital_dep_link_down+0x60/0x60  digital_send_cmd_complete+0x16/0x50  nfcsim_recv_wq+0x38f/0x3d5 [nfcsim]  ? nfcsim_in_send_cmd+0x4a/0x4a [nfcsim]  ? lock_is_held_type+0x98/0x110  ? finish_wait+0x110/0x110  ? rcu_read_lock_sched_held+0x9c/0xd0  ? rcu_read_lock_bh_held+0xb0/0xb0  ? lockdep_hardirqs_on_prepare+0x12e/0x1f0 This flow of calling digital_send_cmd_complete() callback on driver exit is specific to nfcsim which implements reading and sending work queues. Since the NFC digital device was unregistered, the callback should not be called. Fixes: 204bddcb508f ("NFC: nfcsim: Make use of the Digital layer") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/nfcsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index a9864fcdfba6..dd27c85190d3 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work) if (!IS_ERR(skb)) dev_kfree_skb(skb); - - skb = ERR_PTR(-ENODEV); + return; } dev->cb(dev->nfc_digital_dev, dev->arg, skb); -- cgit v1.2.3 From 1e60cebf82948cfdc9497ea4553bab125587593c Mon Sep 17 00:00:00 2001 From: zhang kai Date: Wed, 28 Jul 2021 18:54:18 +0800 Subject: net: let flow have same hash in two directions using same source and destination ip/port for flow hash calculation within the two directions. Signed-off-by: zhang kai Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2aadbfc5193b..4b2415d34873 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1504,7 +1504,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow) } EXPORT_SYMBOL(flow_get_u32_dst); -/* Sort the source and destination IP (and the ports if the IP are the same), +/* Sort the source and destination IP and the ports, * to have consistent hash within the two directions */ static inline void __flow_hash_consistentify(struct flow_keys *keys) @@ -1515,11 +1515,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) case FLOW_DISSECTOR_KEY_IPV4_ADDRS: addr_diff = (__force u32)keys->addrs.v4addrs.dst - (__force u32)keys->addrs.v4addrs.src; - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); + + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; @@ -1527,13 +1527,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) addr_diff = memcmp(&keys->addrs.v6addrs.dst, &keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) { for (i = 0; i < 4; i++) swap(keys->addrs.v6addrs.src.s6_addr32[i], keys->addrs.v6addrs.dst.s6_addr32[i]); + } + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; -- cgit v1.2.3 From 89fb62fde3b226f99b7015280cf132e2a7438edf Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 28 Jul 2021 20:11:07 +0800 Subject: sis900: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index ca9c00b7f588..cff87de9178a 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev, #endif /* setup various bits in PCI command register */ - ret = pci_enable_device(pci_dev); + ret = pcim_enable_device(pci_dev); if(ret) return ret; i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32)); @@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev, ioaddr = pci_iomap(pci_dev, 0, 0); if (!ioaddr) { ret = -ENOMEM; - goto err_out_cleardev; + goto err_out; } sis_priv = netdev_priv(net_dev); @@ -581,8 +581,6 @@ err_unmap_tx: sis_priv->tx_ring_dma); err_out_unmap: pci_iounmap(pci_dev, ioaddr); -err_out_cleardev: - pci_release_regions(pci_dev); err_out: free_netdev(net_dev); return ret; @@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev) sis_priv->tx_ring_dma); pci_iounmap(pci_dev, sis_priv->ioaddr); free_netdev(net_dev); - pci_release_regions(pci_dev); } static int __maybe_unused sis900_suspend(struct device *dev) -- cgit v1.2.3 From f5e81d1117501546b7be050c5fbafa6efd2c722c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: bpf: Introduce BPF nospec instruction for mitigating Spectre v4 In case of JITs, each of the JIT backends compiles the BPF nospec instruction /either/ to a machine instruction which emits a speculation barrier /or/ to /no/ machine instruction in case the underlying architecture is not affected by Speculative Store Bypass or has different mitigations in place already. This covers both x86 and (implicitly) arm64: In case of x86, we use 'lfence' instruction for mitigation. In case of arm64, we rely on the firmware mitigation as controlled via the ssbd kernel parameter. Whenever the mitigation is enabled, it works for all of the kernel code with no need to provide any additional instructions here (hence only comment in arm64 JIT). Other archs can follow as needed. The BPF nospec instruction is specifically targeting Spectre v4 since i) we don't use a serialization barrier for the Spectre v1 case, and ii) mitigation instructions for v1 and v4 might be different on some archs. The BPF nospec is required for a future commit, where the BPF verifier does annotate intermediate BPF programs with speculation barriers. Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 3 +++ arch/arm64/net/bpf_jit_comp.c | 13 +++++++++++++ arch/mips/net/ebpf_jit.c | 3 +++ arch/powerpc/net/bpf_jit_comp32.c | 6 ++++++ arch/powerpc/net/bpf_jit_comp64.c | 6 ++++++ arch/riscv/net/bpf_jit_comp32.c | 4 ++++ arch/riscv/net/bpf_jit_comp64.c | 4 ++++ arch/s390/net/bpf_jit_comp.c | 5 +++++ arch/sparc/net/bpf_jit_comp_64.c | 3 +++ arch/x86/net/bpf_jit_comp.c | 7 +++++++ arch/x86/net/bpf_jit_comp32.c | 6 ++++++ include/linux/filter.h | 15 +++++++++++++++ kernel/bpf/core.c | 19 ++++++++++++++++++- kernel/bpf/disasm.c | 16 +++++++++------- 14 files changed, 102 insertions(+), 8 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 897634d0a67c..a951276f0547 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1602,6 +1602,9 @@ exit: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dccf98a37283..41c23f474ea6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -823,6 +823,19 @@ emit_cond_jmp: return ret; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 939dd06764bc..3a73e9375712 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1355,6 +1355,9 @@ jeq_common: } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 34bb1583fc0c..beb12cbc8c29 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -737,6 +737,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index de8595880fee..b87a63dba9c8 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -627,6 +627,12 @@ emit_clear: } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 81de865f4c7c..e6497424cbf6 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, return -1; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_W: diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 87e3bf5b9086..3af4131c22c7 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -939,6 +939,10 @@ out_be: emit_ld(rd, 0, RV_REG_T1, ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2ae419f5115a..88419263a89a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1153,6 +1153,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, break; } break; + /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; /* * BPF_ST(X) */ diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 4b8d3c65d266..9a2f20cbd48b 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4b951458c9fc..16d76f814e9b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 3da88ded6ee3..3bfda5f502cb 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, i++; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: diff --git a/include/linux/filter.h b/include/linux/filter.h index 472f97074da0..83b896044e79 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -73,6 +73,11 @@ struct ctl_table_header; /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 +/* unused opcode to mark speculation barrier for mitigating + * Speculative Store Bypass + */ +#define BPF_NOSPEC 0xc0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. @@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = 0, \ .imm = 0 }) +/* Speculation barrier */ + +#define BPF_ST_NOSPEC() \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_NOSPEC, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + /* Internal classic blocks for direct assignment */ #define __BPF_STMT(CODE, K) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9b1577498373..b1a5fc04492b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,8 @@ #include #include #include + +#include #include /* Registers */ @@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, @@ -1621,7 +1624,21 @@ out: COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP - /* STX and ST and LDX*/ + /* ST, STX and LDX*/ + ST_NOSPEC: + /* Speculation barrier for mitigating Speculative Store Bypass. + * In case of arm64, we rely on the firmware mitigation as + * controlled via the ssbd kernel parameter. Whenever the + * mitigation is enabled, it works for all of the kernel code + * with no need to provide any additional instructions here. + * In case of x86, we use 'lfence' insn for mitigation. We + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +#ifdef CONFIG_X86 + barrier_nospec(); +#endif + CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index bbfc6bb79240..ca3cd9aaa6ce 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "BUG_%02x\n", insn->code); } } else if (class == BPF_ST) { - if (BPF_MODE(insn->code) != BPF_MEM) { + if (BPF_MODE(insn->code) == BPF_MEM) { + verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->imm); + } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) { + verbose(cbs->private_data, "(%02x) nospec\n", insn->code); + } else { verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); - return; } - verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->imm); } else if (class == BPF_LDX) { if (BPF_MODE(insn->code) != BPF_MEM) { verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code); -- cgit v1.2.3 From 2039f26f3aca5b0e419b98f65dd36481337b86ee Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: bpf: Fix leakage due to insufficient speculative store bypass mitigation Spectre v4 gadgets make use of memory disambiguation, which is a set of techniques that execute memory access instructions, that is, loads and stores, out of program order; Intel's optimization manual, section 2.4.4.5: A load instruction micro-op may depend on a preceding store. Many microarchitectures block loads until all preceding store addresses are known. The memory disambiguator predicts which loads will not depend on any previous stores. When the disambiguator predicts that a load does not have such a dependency, the load takes its data from the L1 data cache. Eventually, the prediction is verified. If an actual conflict is detected, the load and all succeeding instructions are re-executed. af86ca4e3088 ("bpf: Prevent memory disambiguation attack") tried to mitigate this attack by sanitizing the memory locations through preemptive "fast" (low latency) stores of zero prior to the actual "slow" (high latency) store of a pointer value such that upon dependency misprediction the CPU then speculatively executes the load of the pointer value and retrieves the zero value instead of the attacker controlled scalar value previously stored at that location, meaning, subsequent access in the speculative domain is then redirected to the "zero page". The sanitized preemptive store of zero prior to the actual "slow" store is done through a simple ST instruction based on r10 (frame pointer) with relative offset to the stack location that the verifier has been tracking on the original used register for STX, which does not have to be r10. Thus, there are no memory dependencies for this store, since it's only using r10 and immediate constant of zero; hence af86ca4e3088 /assumed/ a low latency operation. However, a recent attack demonstrated that this mitigation is not sufficient since the preemptive store of zero could also be turned into a "slow" store and is thus bypassed as well: [...] // r2 = oob address (e.g. scalar) // r7 = pointer to map value 31: (7b) *(u64 *)(r10 -16) = r2 // r9 will remain "fast" register, r10 will become "slow" register below 32: (bf) r9 = r10 // JIT maps BPF reg to x86 reg: // r9 -> r15 (callee saved) // r10 -> rbp // train store forward prediction to break dependency link between both r9 // and r10 by evicting them from the predictor's LRU table. 33: (61) r0 = *(u32 *)(r7 +24576) 34: (63) *(u32 *)(r7 +29696) = r0 35: (61) r0 = *(u32 *)(r7 +24580) 36: (63) *(u32 *)(r7 +29700) = r0 37: (61) r0 = *(u32 *)(r7 +24584) 38: (63) *(u32 *)(r7 +29704) = r0 39: (61) r0 = *(u32 *)(r7 +24588) 40: (63) *(u32 *)(r7 +29708) = r0 [...] 543: (61) r0 = *(u32 *)(r7 +25596) 544: (63) *(u32 *)(r7 +30716) = r0 // prepare call to bpf_ringbuf_output() helper. the latter will cause rbp // to spill to stack memory while r13/r14/r15 (all callee saved regs) remain // in hardware registers. rbp becomes slow due to push/pop latency. below is // disasm of bpf_ringbuf_output() helper for better visual context: // // ffffffff8117ee20: 41 54 push r12 // ffffffff8117ee22: 55 push rbp // ffffffff8117ee23: 53 push rbx // ffffffff8117ee24: 48 f7 c1 fc ff ff ff test rcx,0xfffffffffffffffc // ffffffff8117ee2b: 0f 85 af 00 00 00 jne ffffffff8117eee0 <-- jump taken // [...] // ffffffff8117eee0: 49 c7 c4 ea ff ff ff mov r12,0xffffffffffffffea // ffffffff8117eee7: 5b pop rbx // ffffffff8117eee8: 5d pop rbp // ffffffff8117eee9: 4c 89 e0 mov rax,r12 // ffffffff8117eeec: 41 5c pop r12 // ffffffff8117eeee: c3 ret 545: (18) r1 = map[id:4] 547: (bf) r2 = r7 548: (b7) r3 = 0 549: (b7) r4 = 4 550: (85) call bpf_ringbuf_output#194288 // instruction 551 inserted by verifier \ 551: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here // storing map value pointer r7 at fp-16 | since value of r10 is "slow". 552: (7b) *(u64 *)(r10 -16) = r7 / // following "fast" read to the same memory location, but due to dependency // misprediction it will speculatively execute before insn 551/552 completes. 553: (79) r2 = *(u64 *)(r9 -16) // in speculative domain contains attacker controlled r2. in non-speculative // domain this contains r7, and thus accesses r7 +0 below. 554: (71) r3 = *(u8 *)(r2 +0) // leak r3 As can be seen, the current speculative store bypass mitigation which the verifier inserts at line 551 is insufficient since /both/, the write of the zero sanitation as well as the map value pointer are a high latency instruction due to prior memory access via push/pop of r10 (rbp) in contrast to the low latency read in line 553 as r9 (r15) which stays in hardware registers. Thus, architecturally, fp-16 is r7, however, microarchitecturally, fp-16 can still be r2. Initial thoughts to address this issue was to track spilled pointer loads from stack and enforce their load via LDX through r10 as well so that /both/ the preemptive store of zero /as well as/ the load use the /same/ register such that a dependency is created between the store and load. However, this option is not sufficient either since it can be bypassed as well under speculation. An updated attack with pointer spill/fills now _all_ based on r10 would look as follows: [...] // r2 = oob address (e.g. scalar) // r7 = pointer to map value [...] // longer store forward prediction training sequence than before. 2062: (61) r0 = *(u32 *)(r7 +25588) 2063: (63) *(u32 *)(r7 +30708) = r0 2064: (61) r0 = *(u32 *)(r7 +25592) 2065: (63) *(u32 *)(r7 +30712) = r0 2066: (61) r0 = *(u32 *)(r7 +25596) 2067: (63) *(u32 *)(r7 +30716) = r0 // store the speculative load address (scalar) this time after the store // forward prediction training. 2068: (7b) *(u64 *)(r10 -16) = r2 // preoccupy the CPU store port by running sequence of dummy stores. 2069: (63) *(u32 *)(r7 +29696) = r0 2070: (63) *(u32 *)(r7 +29700) = r0 2071: (63) *(u32 *)(r7 +29704) = r0 2072: (63) *(u32 *)(r7 +29708) = r0 2073: (63) *(u32 *)(r7 +29712) = r0 2074: (63) *(u32 *)(r7 +29716) = r0 2075: (63) *(u32 *)(r7 +29720) = r0 2076: (63) *(u32 *)(r7 +29724) = r0 2077: (63) *(u32 *)(r7 +29728) = r0 2078: (63) *(u32 *)(r7 +29732) = r0 2079: (63) *(u32 *)(r7 +29736) = r0 2080: (63) *(u32 *)(r7 +29740) = r0 2081: (63) *(u32 *)(r7 +29744) = r0 2082: (63) *(u32 *)(r7 +29748) = r0 2083: (63) *(u32 *)(r7 +29752) = r0 2084: (63) *(u32 *)(r7 +29756) = r0 2085: (63) *(u32 *)(r7 +29760) = r0 2086: (63) *(u32 *)(r7 +29764) = r0 2087: (63) *(u32 *)(r7 +29768) = r0 2088: (63) *(u32 *)(r7 +29772) = r0 2089: (63) *(u32 *)(r7 +29776) = r0 2090: (63) *(u32 *)(r7 +29780) = r0 2091: (63) *(u32 *)(r7 +29784) = r0 2092: (63) *(u32 *)(r7 +29788) = r0 2093: (63) *(u32 *)(r7 +29792) = r0 2094: (63) *(u32 *)(r7 +29796) = r0 2095: (63) *(u32 *)(r7 +29800) = r0 2096: (63) *(u32 *)(r7 +29804) = r0 2097: (63) *(u32 *)(r7 +29808) = r0 2098: (63) *(u32 *)(r7 +29812) = r0 // overwrite scalar with dummy pointer; same as before, also including the // sanitation store with 0 from the current mitigation by the verifier. 2099: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here 2100: (7b) *(u64 *)(r10 -16) = r7 | since store unit is still busy. // load from stack intended to bypass stores. 2101: (79) r2 = *(u64 *)(r10 -16) 2102: (71) r3 = *(u8 *)(r2 +0) // leak r3 [...] Looking at the CPU microarchitecture, the scheduler might issue loads (such as seen in line 2101) before stores (line 2099,2100) because the load execution units become available while the store execution unit is still busy with the sequence of dummy stores (line 2069-2098). And so the load may use the prior stored scalar from r2 at address r10 -16 for speculation. The updated attack may work less reliable on CPU microarchitectures where loads and stores share execution resources. This concludes that the sanitizing with zero stores from af86ca4e3088 ("bpf: Prevent memory disambiguation attack") is insufficient. Moreover, the detection of stack reuse from af86ca4e3088 where previously data (STACK_MISC) has been written to a given stack slot where a pointer value is now to be stored does not have sufficient coverage as precondition for the mitigation either; for several reasons outlined as follows: 1) Stack content from prior program runs could still be preserved and is therefore not "random", best example is to split a speculative store bypass attack between tail calls, program A would prepare and store the oob address at a given stack slot and then tail call into program B which does the "slow" store of a pointer to the stack with subsequent "fast" read. From program B PoV such stack slot type is STACK_INVALID, and therefore also must be subject to mitigation. 2) The STACK_SPILL must not be coupled to register_is_const(&stack->spilled_ptr) condition, for example, the previous content of that memory location could also be a pointer to map or map value. Without the fix, a speculative store bypass is not mitigated in such precondition and can then lead to a type confusion in the speculative domain leaking kernel memory near these pointer types. While brainstorming on various alternative mitigation possibilities, we also stumbled upon a retrospective from Chrome developers [0]: [...] For variant 4, we implemented a mitigation to zero the unused memory of the heap prior to allocation, which cost about 1% when done concurrently and 4% for scavenging. Variant 4 defeats everything we could think of. We explored more mitigations for variant 4 but the threat proved to be more pervasive and dangerous than we anticipated. For example, stack slots used by the register allocator in the optimizing compiler could be subject to type confusion, leading to pointer crafting. Mitigating type confusion for stack slots alone would have required a complete redesign of the backend of the optimizing compiler, perhaps man years of work, without a guarantee of completeness. [...] From BPF side, the problem space is reduced, however, options are rather limited. One idea that has been explored was to xor-obfuscate pointer spills to the BPF stack: [...] // preoccupy the CPU store port by running sequence of dummy stores. [...] 2106: (63) *(u32 *)(r7 +29796) = r0 2107: (63) *(u32 *)(r7 +29800) = r0 2108: (63) *(u32 *)(r7 +29804) = r0 2109: (63) *(u32 *)(r7 +29808) = r0 2110: (63) *(u32 *)(r7 +29812) = r0 // overwrite scalar with dummy pointer; xored with random 'secret' value // of 943576462 before store ... 2111: (b4) w11 = 943576462 2112: (af) r11 ^= r7 2113: (7b) *(u64 *)(r10 -16) = r11 2114: (79) r11 = *(u64 *)(r10 -16) 2115: (b4) w2 = 943576462 2116: (af) r2 ^= r11 // ... and restored with the same 'secret' value with the help of AX reg. 2117: (71) r3 = *(u8 *)(r2 +0) [...] While the above would not prevent speculation, it would make data leakage infeasible by directing it to random locations. In order to be effective and prevent type confusion under speculation, such random secret would have to be regenerated for each store. The additional complexity involved for a tracking mechanism that prevents jumps such that restoring spilled pointers would not get corrupted is not worth the gain for unprivileged. Hence, the fix in here eventually opted for emitting a non-public BPF_ST | BPF_NOSPEC instruction which the x86 JIT translates into a lfence opcode. Inserting the latter in between the store and load instruction is one of the mitigations options [1]. The x86 instruction manual notes: [...] An LFENCE that follows an instruction that stores to memory might complete before the data being stored have become globally visible. [...] The latter meaning that the preceding store instruction finished execution and the store is at minimum guaranteed to be in the CPU's store queue, but it's not guaranteed to be in that CPU's L1 cache at that point (globally visible). The latter would only be guaranteed via sfence. So the load which is guaranteed to execute after the lfence for that local CPU would have to rely on store-to-load forwarding. [2], in section 2.3 on store buffers says: [...] For every store operation that is added to the ROB, an entry is allocated in the store buffer. This entry requires both the virtual and physical address of the target. Only if there is no free entry in the store buffer, the frontend stalls until there is an empty slot available in the store buffer again. Otherwise, the CPU can immediately continue adding subsequent instructions to the ROB and execute them out of order. On Intel CPUs, the store buffer has up to 56 entries. [...] One small upside on the fix is that it lifts constraints from af86ca4e3088 where the sanitize_stack_off relative to r10 must be the same when coming from different paths. The BPF_ST | BPF_NOSPEC gets emitted after a BPF_STX or BPF_ST instruction. This happens either when we store a pointer or data value to the BPF stack for the first time, or upon later pointer spills. The former needs to be enforced since otherwise stale stack data could be leaked under speculation as outlined earlier. For non-x86 JITs the BPF_ST | BPF_NOSPEC mapping is currently optimized away, but others could emit a speculation barrier as well if necessary. For real-world unprivileged programs e.g. generated by LLVM, pointer spill/fill is only generated upon register pressure and LLVM only tries to do that for pointers which are not used often. The program main impact will be the initial BPF_ST | BPF_NOSPEC sanitation for the STACK_INVALID case when the first write to a stack slot occurs e.g. upon map lookup. In future we might refine ways to mitigate the latter cost. [0] https://arxiv.org/pdf/1902.05178.pdf [1] https://msrc-blog.microsoft.com/2018/05/21/analysis-and-mitigation-of-speculative-store-bypass-cve-2018-3639/ [2] https://arxiv.org/pdf/1905.05725.pdf Fixes: af86ca4e3088 ("bpf: Prevent memory disambiguation attack") Fixes: f7cf25b2026d ("bpf: track spill/fill of constants") Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 87 ++++++++++++++++---------------------------- 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7ba7e800d472..828d08afeee0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -340,8 +340,8 @@ struct bpf_insn_aux_data { }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int sanitize_stack_off; /* stack slot to be cleared */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ + bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 657062cb4d85..f9bda5476ea5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2610,6 +2610,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, cur = env->cur_state->frame[env->cur_state->curframe]; if (value_regno >= 0) reg = &cur->regs[value_regno]; + if (!env->bypass_spec_v4) { + bool sanitize = reg && is_spillable_regtype(reg->type); + + for (i = 0; i < size; i++) { + if (state->stack[spi].slot_type[i] == STACK_INVALID) { + sanitize = true; + break; + } + } + + if (sanitize) + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { @@ -2632,47 +2645,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, verbose(env, "invalid size of register spill\n"); return -EACCES; } - if (state != cur && reg->type == PTR_TO_STACK) { verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - - if (!env->bypass_spec_v4) { - bool sanitize = false; - - if (state->stack[spi].slot_type[0] == STACK_SPILL && - register_is_const(&state->stack[spi].spilled_ptr)) - sanitize = true; - for (i = 0; i < BPF_REG_SIZE; i++) - if (state->stack[spi].slot_type[i] == STACK_MISC) { - sanitize = true; - break; - } - if (sanitize) { - int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; - int soff = (-spi - 1) * BPF_REG_SIZE; - - /* detected reuse of integer stack slot with a pointer - * which means either llvm is reusing stack slot or - * an attacker is trying to exploit CVE-2018-3639 - * (speculative store bypass) - * Have to sanitize that slot with preemptive - * store of zero. - */ - if (*poff && *poff != soff) { - /* disallow programs where single insn stores - * into two different stack slots, since verifier - * cannot sanitize them - */ - verbose(env, - "insn %d cannot access two stack slots fp%d and fp%d", - insn_idx, *poff, soff); - return -EINVAL; - } - *poff = soff; - } - } save_register_state(state, spi, reg); } else { u8 type = STACK_MISC; @@ -11913,35 +11889,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; + bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || - insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || - insn->code == (BPF_STX | BPF_MEM | BPF_H) || - insn->code == (BPF_STX | BPF_MEM | BPF_W) || - insn->code == (BPF_STX | BPF_MEM | BPF_DW)) + ctx_access = true; + } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || + insn->code == (BPF_STX | BPF_MEM | BPF_H) || + insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW) || + insn->code == (BPF_ST | BPF_MEM | BPF_B) || + insn->code == (BPF_ST | BPF_MEM | BPF_H) || + insn->code == (BPF_ST | BPF_MEM | BPF_W) || + insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - else + ctx_access = BPF_CLASS(insn->code) == BPF_STX; + } else { continue; + } if (type == BPF_WRITE && - env->insn_aux_data[i + delta].sanitize_stack_off) { + env->insn_aux_data[i + delta].sanitize_stack_spill) { struct bpf_insn patch[] = { - /* Sanitize suspicious stack slot with zero. - * There are no memory dependencies for this store, - * since it's only using frame pointer and immediate - * constant of zero - */ - BPF_ST_MEM(BPF_DW, BPF_REG_FP, - env->insn_aux_data[i + delta].sanitize_stack_off, - 0), - /* the original STX instruction will immediately - * overwrite the same stack slot with appropriate value - */ *insn, + BPF_ST_NOSPEC(), }; cnt = ARRAY_SIZE(patch); @@ -11955,6 +11929,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } + if (!ctx_access) + continue; + switch (env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) -- cgit v1.2.3 From 8a7b46fa7902a3d36ce44a64f4d66586d66206ea Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 26 Jul 2021 11:26:44 +0200 Subject: MAINTAINERS: add Yasushi SHOJI as reviewer for the Microchip CAN BUS Analyzer Tool driver This patch adds Yasushi SHOJI as a reviewer for the Microchip CAN BUS Analyzer Tool driver. Link: https://lore.kernel.org/r/20210726111619.1023991-1-mkl@pengutronix.de Acked-by: Yasushi SHOJI Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58afeb12d3b3..42ea3183e87c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11327,6 +11327,12 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* +MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER +R: Yasushi SHOJI +L: linux-can@vger.kernel.org +S: Maintained +F: drivers/net/can/usb/mcba_usb.c + MCAN MMIO DEVICE DRIVER M: Chandrasekar Ramakrishnan L: linux-can@vger.kernel.org -- cgit v1.2.3 From f6b3c7848e66e9046c8a79a5b88fd03461cc252b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Jul 2021 17:12:46 +0300 Subject: can: hi311x: fix a signedness bug in hi3110_cmd() The hi3110_cmd() is supposed to return zero on success and negative error codes on failure, but it was accidentally declared as a u8 when it needs to be an int type. Fixes: 57e83fb9b746 ("can: hi311x: Add Holt HI-311x CAN driver") Link: https://lore.kernel.org/r/20210729141246.GA1267@kili Signed-off-by: Dan Carpenter Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index dd17b8c53e1c..89d9c986a229 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len) return ret; } -static u8 hi3110_cmd(struct spi_device *spi, u8 command) +static int hi3110_cmd(struct spi_device *spi, u8 command) { struct hi3110_priv *priv = spi_get_drvdata(spi); -- cgit v1.2.3 From fc43fb69a7af92839551f99c1a96a37b77b3ae7a Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 13:36:30 +0300 Subject: can: mcba_usb_start(): add missing urb->transfer_dma initialization Yasushi reported, that his Microchip CAN Analyzer stopped working since commit 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb"). The problem was in missing urb->transfer_dma initialization. In my previous patch to this driver I refactored mcba_usb_start() code to avoid leaking usb coherent buffers. To archive it, I passed local stack variable to usb_alloc_coherent() and then saved it to private array to correctly free all coherent buffers on ->close() call. But I forgot to initialize urb->transfer_dma with variable passed to usb_alloc_coherent(). All of this was causing device to not work, since dma addr 0 is not valid and following log can be found on bug report page, which points exactly to problem described above. | DMAR: [DMA Write] Request device [00:14.0] PASID ffffffff fault addr 0 [fault reason 05] PTE Write access is not set Fixes: 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb") Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=990850 Link: https://lore.kernel.org/r/20210725103630.23864-1-paskripkin@gmail.com Cc: linux-stable Reported-by: Yasushi SHOJI Signed-off-by: Pavel Skripkin Tested-by: Yasushi SHOJI [mkl: fixed typos in commit message - thanks Yasushi SHOJI] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index a45865bd7254..a1a154c08b7f 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), buf, MCBA_USB_RX_BUFF_SIZE, -- cgit v1.2.3 From 0e865f0c31928d6a313269ef624907eec55287c4 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 19:59:57 +0300 Subject: can: usb_8dev: fix memory leak In usb_8dev_start() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see usb_8dev_start) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/r/d39b458cd425a1cf7f512f340224e6e9563b07bd.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index b6e7ef0d5bc6..d1b83bd1b3cb 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -137,7 +137,8 @@ struct usb_8dev_priv { u8 *cmd_msg_buffer; struct mutex usb_8dev_cmd_lock; - + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; /* tx frame */ @@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) } buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, USB_8DEV_ENDP_DATA_RX), @@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + priv->rxbuf[i] = buf; + priv->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv) usb_kill_anchored_urbs(&priv->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(priv->udev, RX_BUFFER_SIZE, + priv->rxbuf[i], priv->rxbuf_dma[i]); + usb_kill_anchored_urbs(&priv->tx_submitted); atomic_set(&priv->active_tx_urbs, 0); -- cgit v1.2.3 From 9969e3c5f40c166e3396acc36c34f9de502929f6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 20:00:33 +0300 Subject: can: ems_usb: fix memory leak In ems_usb_start() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see ems_usb_start) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface") Link: https://lore.kernel.org/r/59aa9fbc9a8cbf9af2bbd2f61a659c480b415800.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 0a37af4a3fa4..2b5302e72435 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -255,6 +255,8 @@ struct ems_usb { unsigned int free_slots; /* remember number of available slots */ struct ems_cpc_msg active_params; /* active controller parameters */ + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; static void ems_usb_read_interrupt_callback(struct urb *urb) @@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2), buf, RX_BUFFER_SIZE, ems_usb_read_bulk_callback, dev); @@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev) break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev) usb_kill_anchored_urbs(&dev->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); -- cgit v1.2.3 From 928150fad41ba16df7fcc9f7f945747d0f56cbb6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 20:00:46 +0300 Subject: can: esd_usb2: fix memory leak In esd_usb2_setup_rx_urbs() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see esd_usb2_setup_rx_urbs) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Link: https://lore.kernel.org/r/b31b096926dcb35998ad0271aac4b51770ca7cc8.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 65b58f8fc328..66fa8b07c2e6 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -195,6 +195,8 @@ struct esd_usb2 { int net_count; u32 version; int rxinitdone; + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; struct esd_usb2_net_priv { @@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { dev_warn(dev->udev->dev.parent, "No memory left for USB buffer\n"); @@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) goto freeurb; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, RX_BUFFER_SIZE, @@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) usb_unanchor_urb(urb); usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf, urb->transfer_dma); + goto freeurb; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + freeurb: /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); @@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev) int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); + + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + for (i = 0; i < dev->net_count; i++) { priv = dev->nets[i]; if (priv) { -- cgit v1.2.3 From 028a71775f811e9d60664ba2c248ff95c6cf57cb Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Thu, 29 Jul 2021 08:52:58 -0700 Subject: gve: Update MAINTAINERS list The team maintaining the gve driver has undergone some changes, this updates the MAINTAINERS file accordingly. Signed-off-by: Catherine Sullivan Signed-off-by: Jon Olson Signed-off-by: David Awogbemila Signed-off-by: Jeroen de Borst Link: https://lore.kernel.org/r/20210729155258.442650-1-csully@google.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 58afeb12d3b3..17a873153eba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7858,9 +7858,9 @@ S: Maintained F: drivers/input/touchscreen/goodix.c GOOGLE ETHERNET DRIVERS -M: Catherine Sullivan -R: Sagi Shahar -R: Jon Olson +M: Jeroen de Borst +R: Catherine Sullivan +R: David Awogbemila L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/google/gve.rst -- cgit v1.2.3