diff options
193 files changed, 1948 insertions, 1224 deletions
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 032b76f14f4f..9119f1caf391 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -21,6 +21,7 @@ Required properties: - "renesas,etheravb-r8a774a1" for the R8A774A1 SoC. - "renesas,etheravb-r8a774b1" for the R8A774B1 SoC. - "renesas,etheravb-r8a774c0" for the R8A774C0 SoC. + - "renesas,etheravb-r8a774e1" for the R8A774E1 SoC. - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A77960 SoC. - "renesas,etheravb-r8a77961" for the R8A77961 SoC. diff --git a/MAINTAINERS b/MAINTAINERS index ec6fa6e49933..9a54806ebf02 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8757,7 +8757,8 @@ F: include/drm/i915* F: include/uapi/drm/i915_drm.h INTEL ETHERNET DRIVERS -M: Jeff Kirsher <jeffrey.t.kirsher@intel.com> +M: Jesse Brandeburg <jesse.brandeburg@intel.com> +M: Tony Nguyen <anthony.l.nguyen@intel.com> L: intel-wired-lan@lists.osuosl.org (moderated for non-subscribers) S: Supported W: http://www.intel.com/support/feedback.htm @@ -12082,6 +12083,7 @@ NETWORKING [DSA] M: Andrew Lunn <andrew@lunn.ch> M: Vivien Didelot <vivien.didelot@gmail.com> M: Florian Fainelli <f.fainelli@gmail.com> +M: Vladimir Oltean <olteanv@gmail.com> S: Maintained F: Documentation/devicetree/bindings/net/dsa/ F: drivers/net/dsa/ @@ -18287,7 +18289,8 @@ F: drivers/gpu/vga/vga_switcheroo.c F: include/linux/vga_switcheroo.h VIA RHINE NETWORK DRIVER -S: Orphan +S: Maintained +M: Kevin Brace <kevinbrace@bracecomputerlab.com> F: drivers/net/ethernet/via/via-rhine.c VIA SD/MMC CARD CONTROLLER DRIVER @@ -18892,10 +18895,10 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm F: arch/x86/mm/ X86 PLATFORM DRIVERS -M: Darren Hart <dvhart@infradead.org> -M: Andy Shevchenko <andy@infradead.org> +M: Hans de Goede <hdegoede@redhat.com> +M: Mark Gross <mgross@linux.intel.com> L: platform-driver-x86@vger.kernel.org -S: Odd Fixes +S: Maintained T: git git://git.infradead.org/linux-platform-drivers-x86.git F: drivers/platform/olpc/ F: drivers/platform/x86/ diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index e93145d72c26..a6ab3689b2f4 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -150,7 +150,7 @@ static int xen_starting_cpu(unsigned int cpu) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - info.mfn = virt_to_gfn(vcpup); + info.mfn = percpu_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index b357164379f6..63a52ad9a75c 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -788,7 +788,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) 0: mov bskey, x21 mov rounds, x22 - br x7 + br x16 SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 @@ -806,7 +806,7 @@ SYM_FUNC_END(__xts_crypt8) uzp1 v30.4s, v30.4s, v25.4s ld1 {v25.16b}, [x24] -99: adr x7, \do8 +99: adr x16, \do8 bl __xts_crypt8 ldp q16, q17, [sp, #.Lframe_local_offset] diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 78d61f97371e..e809cb5a1631 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -475,7 +475,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; - fallthrough; cond_branch: /* same targets, can avoid doing the test :) */ if (filter[i].jt == filter[i].jf) { diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index d6e18df9c53c..4b044e620d35 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -305,8 +305,6 @@ int ibm_partition(struct parsed_partitions *state) if (!disk->fops->getgeo) goto out_exit; fn = symbol_get(dasd_biodasdinfo); - if (!fn) - goto out_exit; blocksize = bdev_logical_block_size(bdev); if (blocksize <= 0) goto out_symbol; @@ -326,7 +324,7 @@ int ibm_partition(struct parsed_partitions *state) geo->start = get_start_sect(bdev); if (disk->fops->getgeo(bdev, geo)) goto out_freeall; - if (fn(disk, info)) { + if (!fn || fn(disk, info)) { kfree(info); info = NULL; } @@ -370,7 +368,8 @@ out_nolab: out_nogeo: kfree(info); out_symbol: - symbol_put(dasd_biodasdinfo); + if (fn) + symbol_put(dasd_biodasdinfo); out_exit: return res; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index ef722f04f88a..72108404718f 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -651,6 +651,7 @@ struct compat_cdrom_generic_command { compat_int_t stat; compat_caddr_t sense; unsigned char data_direction; + unsigned char pad[3]; compat_int_t quiet; compat_int_t timeout; compat_caddr_t reserved[1]; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 04b6bde9419d..573dbf6f0c31 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1553,7 +1553,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa * put_page(); and would cause either a VM_BUG directly, or * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ - if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page)) + if (drbd_disable_sendpage || !sendpage_ok(page)) return _drbd_no_send_page(peer_device, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index ffad73bb40ff..5a76611e684a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1320,9 +1320,10 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) } EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu); -static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +static int get_lower_dev_vlan(struct net_device *lower_dev, + struct netdev_nested_priv *priv) { - u16 *vlan_id = data; + u16 *vlan_id = (u16 *)priv->data; if (is_vlan_dev(lower_dev)) *vlan_id = vlan_dev_vlan_id(lower_dev); @@ -1348,6 +1349,9 @@ static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, u16 *vlan_id, u8 *smac) { + struct netdev_nested_priv priv = { + .data = (void *)vlan_id, + }; struct net_device *ndev; rcu_read_lock(); @@ -1368,7 +1372,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, * the lower vlan device for this gid entry. */ netdev_walk_all_lower_dev_rcu(attr->ndev, - get_lower_dev_vlan, vlan_id); + get_lower_dev_vlan, &priv); } } rcu_read_unlock(); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7f0e91e92968..5888311b2119 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2865,9 +2865,10 @@ struct iboe_prio_tc_map { bool found; }; -static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +static int get_lower_vlan_dev_tc(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct iboe_prio_tc_map *map = data; + struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data; if (is_vlan_dev(dev)) map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); @@ -2886,16 +2887,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) { struct iboe_prio_tc_map prio_tc_map = {}; int prio = rt_tos2priority(tos); + struct netdev_nested_priv priv; /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) return get_vlan_ndev_tc(ndev, prio); prio_tc_map.input_prio = prio; + priv.data = (void *)&prio_tc_map; rcu_read_lock(); netdev_walk_all_lower_dev_rcu(ndev, get_lower_vlan_dev_tc, - &prio_tc_map); + &priv); rcu_read_unlock(); /* If map is found from lower device, use it; Otherwise * continue with the current netdevice to get priority to tc map. diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 2860def84f4d..6b8364bb032d 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -531,10 +531,11 @@ struct upper_list { struct net_device *upper; }; -static int netdev_upper_walk(struct net_device *upper, void *data) +static int netdev_upper_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - struct list_head *upper_list = data; + struct list_head *upper_list = (struct list_head *)priv->data; if (!entry) return 0; @@ -553,12 +554,14 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, struct net_device *ndev)) { struct net_device *ndev = cookie; + struct netdev_nested_priv priv; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); + priv.data = &upper_list; rcu_read_lock(); - netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list); + netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv); rcu_read_unlock(); handle_netdev(ib_dev, port, ndev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ab75b7f745d4..f772fe8c5b66 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -342,9 +342,10 @@ struct ipoib_walk_data { struct net_device *result; }; -static int ipoib_upper_walk(struct net_device *upper, void *_data) +static int ipoib_upper_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - struct ipoib_walk_data *data = _data; + struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data; int ret = 0; if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) { @@ -368,10 +369,12 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data) static struct net_device *ipoib_get_net_dev_match_addr( const struct sockaddr *addr, struct net_device *dev) { + struct netdev_nested_priv priv; struct ipoib_walk_data data = { .addr = addr, }; + priv.data = (void *)&data; rcu_read_lock(); if (ipoib_is_dev_match_addr_rcu(addr, dev)) { dev_hold(dev); @@ -379,7 +382,7 @@ static struct net_device *ipoib_get_net_dev_match_addr( goto out; } - netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data); + netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv); out: rcu_read_unlock(); return data.result; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 4e1b7deb724b..c3091e00dd5f 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -942,9 +942,10 @@ struct alb_walk_data { bool strict_match; }; -static int alb_upper_dev_walk(struct net_device *upper, void *_data) +static int alb_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - struct alb_walk_data *data = _data; + struct alb_walk_data *data = (struct alb_walk_data *)priv->data; bool strict_match = data->strict_match; struct bonding *bond = data->bond; struct slave *slave = data->slave; @@ -983,6 +984,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); + struct netdev_nested_priv priv; struct alb_walk_data data = { .strict_match = strict_match, .mac_addr = mac_addr, @@ -990,6 +992,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], .bond = bond, }; + priv.data = (void *)&data; /* send untagged */ alb_send_lp_vid(slave, mac_addr, 0, 0); @@ -997,7 +1000,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], * for that device. */ rcu_read_lock(); - netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data); + netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv); rcu_read_unlock(); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 42ef25ec0af5..84ecbc6fa0ff 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1315,6 +1315,7 @@ static void bond_setup_by_slave(struct net_device *bond_dev, bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; + bond_dev->needed_headroom = slave_dev->needed_headroom; bond_dev->addr_len = slave_dev->addr_len; memcpy(bond_dev->broadcast, slave_dev->broadcast, @@ -2510,22 +2511,26 @@ re_arm: } } -static int bond_upper_dev_walk(struct net_device *upper, void *data) +static int bond_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - __be32 ip = *((__be32 *)data); + __be32 ip = *(__be32 *)priv->data; return ip == bond_confirm_addr(upper, 0, ip); } static bool bond_has_this_ip(struct bonding *bond, __be32 ip) { + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; bool ret = false; if (ip == bond_confirm_addr(bond->dev, 0, ip)) return true; rcu_read_lock(); - if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip)) + if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv)) ret = true; rcu_read_unlock(); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 8e755b50c9c1..c796d42730ba 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -387,8 +387,8 @@ EXPORT_SYMBOL(ksz_switch_alloc); int ksz_switch_register(struct ksz_device *dev, const struct ksz_dev_ops *ops) { + struct device_node *port, *ports; phy_interface_t interface; - struct device_node *port; unsigned int port_num; int ret; @@ -429,13 +429,17 @@ int ksz_switch_register(struct ksz_device *dev, ret = of_get_phy_mode(dev->dev->of_node, &interface); if (ret == 0) dev->compat_interface = interface; - for_each_available_child_of_node(dev->dev->of_node, port) { - if (of_property_read_u32(port, "reg", &port_num)) - continue; - if (port_num >= dev->port_cnt) - return -EINVAL; - of_get_phy_mode(port, &dev->ports[port_num].interface); - } + ports = of_get_child_by_name(dev->dev->of_node, "ports"); + if (ports) + for_each_available_child_of_node(ports, port) { + if (of_property_read_u32(port, "reg", + &port_num)) + continue; + if (port_num >= dev->port_cnt) + return -EINVAL; + of_get_phy_mode(port, + &dev->ports[port_num].interface); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); } diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 6855c94256f8..3a9637496407 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -685,12 +685,12 @@ static struct vcap_field vsc9959_vcap_is2_actions[] = { [VCAP_IS2_ACT_POLICE_ENA] = { 9, 1}, [VCAP_IS2_ACT_POLICE_IDX] = { 10, 9}, [VCAP_IS2_ACT_POLICE_VCAP_ONLY] = { 19, 1}, - [VCAP_IS2_ACT_PORT_MASK] = { 20, 11}, - [VCAP_IS2_ACT_REW_OP] = { 31, 9}, - [VCAP_IS2_ACT_SMAC_REPLACE_ENA] = { 40, 1}, - [VCAP_IS2_ACT_RSV] = { 41, 2}, - [VCAP_IS2_ACT_ACL_ID] = { 43, 6}, - [VCAP_IS2_ACT_HIT_CNT] = { 49, 32}, + [VCAP_IS2_ACT_PORT_MASK] = { 20, 6}, + [VCAP_IS2_ACT_REW_OP] = { 26, 9}, + [VCAP_IS2_ACT_SMAC_REPLACE_ENA] = { 35, 1}, + [VCAP_IS2_ACT_RSV] = { 36, 2}, + [VCAP_IS2_ACT_ACL_ID] = { 38, 6}, + [VCAP_IS2_ACT_HIT_CNT] = { 44, 32}, }; static const struct vcap_props vsc9959_vcap_props[] = { @@ -1171,6 +1171,8 @@ static int vsc9959_prevalidate_phy_mode(struct ocelot *ocelot, int port, */ static u16 vsc9959_wm_enc(u16 value) { + WARN_ON(value >= 16 * BIT(8)); + if (value >= BIT(8)) return BIT(8) | (value / 16); @@ -1284,8 +1286,28 @@ void vsc9959_mdio_bus_free(struct ocelot *ocelot) static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, u32 speed) { + u8 tas_speed; + + switch (speed) { + case SPEED_10: + tas_speed = OCELOT_SPEED_10; + break; + case SPEED_100: + tas_speed = OCELOT_SPEED_100; + break; + case SPEED_1000: + tas_speed = OCELOT_SPEED_1000; + break; + case SPEED_2500: + tas_speed = OCELOT_SPEED_2500; + break; + default: + tas_speed = OCELOT_SPEED_1000; + break; + } + ocelot_rmw_rix(ocelot, - QSYS_TAG_CONFIG_LINK_SPEED(speed), + QSYS_TAG_CONFIG_LINK_SPEED(tas_speed), QSYS_TAG_CONFIG_LINK_SPEED_M, QSYS_TAG_CONFIG, port); } diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 29df0797ecf5..9e9fd19e1d00 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -706,7 +706,7 @@ static const struct vcap_props vsc9953_vcap_props[] = { .action_type_width = 1, .action_table = { [IS2_ACTION_TYPE_NORMAL] = { - .width = 44, + .width = 50, /* HIT_CNT not included */ .count = 2 }, [IS2_ACTION_TYPE_SMAC_SIP] = { @@ -911,6 +911,8 @@ static int vsc9953_prevalidate_phy_mode(struct ocelot *ocelot, int port, */ static u16 vsc9953_wm_enc(u16 value) { + WARN_ON(value >= 16 * BIT(9)); + if (value >= BIT(9)) return BIT(9) | (value / 16); diff --git a/drivers/net/ethernet/3com/typhoon.h b/drivers/net/ethernet/3com/typhoon.h index 2f634c64d5d1..38e6dcab4e94 100644 --- a/drivers/net/ethernet/3com/typhoon.h +++ b/drivers/net/ethernet/3com/typhoon.h @@ -33,7 +33,7 @@ struct basic_ring { u32 lastWrite; }; -/* The Typoon transmit ring -- same as a basic ring, plus: +/* The Typhoon transmit ring -- same as a basic ring, plus: * lastRead: where we're at in regard to cleaning up the ring * writeRegister: register to use for writing (different for Hi & Lo rings) */ diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile index 130a105d03f3..8ebcc68e807f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/Makefile +++ b/drivers/net/ethernet/aquantia/atlantic/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_AQTION) += atlantic.o -ccflags-y += -I$(src) +ccflags-y += -I$(srctree)/$(src) atlantic-objs := aq_main.o \ aq_nic.o \ @@ -33,4 +33,4 @@ atlantic-objs := aq_main.o \ atlantic-$(CONFIG_MACSEC) += aq_macsec.o -atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o
\ No newline at end of file +atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index bfc0e45d4a2b..5caa75b41b73 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -284,12 +284,12 @@ #define CCM_REG_GR_ARB_TYPE 0xd015c /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed; that the Store channel priority is - the compliment to 4 of the rest priorities - Aggregation channel; Load + the complement to 4 of the rest priorities - Aggregation channel; Load (FIC0) channel and Load (FIC1). */ #define CCM_REG_GR_LD0_PR 0xd0164 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed; that the Store channel priority is - the compliment to 4 of the rest priorities - Aggregation channel; Load + the complement to 4 of the rest priorities - Aggregation channel; Load (FIC0) channel and Load (FIC1). */ #define CCM_REG_GR_LD1_PR 0xd0168 /* [RW 2] General flags index. */ @@ -4489,11 +4489,11 @@ #define TCM_REG_GR_ARB_TYPE 0x50114 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define TCM_REG_GR_LD0_PR 0x5011c /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define TCM_REG_GR_LD1_PR 0x50120 /* [RW 4] The number of double REG-pairs; loaded from the STORM context and sent to STORM; for a specific connection type. The double REG-pairs are @@ -5020,11 +5020,11 @@ #define UCM_REG_GR_ARB_TYPE 0xe0144 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel group is - compliment to the others. */ + complement to the others. */ #define UCM_REG_GR_LD0_PR 0xe014c /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel group is - compliment to the others. */ + complement to the others. */ #define UCM_REG_GR_LD1_PR 0xe0150 /* [RW 2] The queue index for invalidate counter flag decision. */ #define UCM_REG_INV_CFLG_Q 0xe00e4 @@ -5523,11 +5523,11 @@ #define XCM_REG_GR_ARB_TYPE 0x2020c /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Channel group is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define XCM_REG_GR_LD0_PR 0x20214 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Channel group is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define XCM_REG_GR_LD1_PR 0x20218 /* [RW 1] Input nig0 Interface enable. If 0 - the valid input is disregarded; acknowledge output is deasserted; all other signals are diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 3e17ce0d2314..6cb2162a75d4 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1219,7 +1219,7 @@ static int octeon_mgmt_open(struct net_device *netdev) */ if (netdev->phydev) { netif_carrier_off(netdev); - phy_start_aneg(netdev->phydev); + phy_start(netdev->phydev); } netif_wake_queue(netdev); @@ -1247,8 +1247,10 @@ static int octeon_mgmt_stop(struct net_device *netdev) napi_disable(&p->napi); netif_stop_queue(netdev); - if (netdev->phydev) + if (netdev->phydev) { + phy_stop(netdev->phydev); phy_disconnect(netdev->phydev); + } netif_carrier_off(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h index 593e3812af93..3c06f5fb5759 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h @@ -11,9 +11,11 @@ #define DPNI_VER_MAJOR 7 #define DPNI_VER_MINOR 0 #define DPNI_CMD_BASE_VERSION 1 +#define DPNI_CMD_2ND_VERSION 2 #define DPNI_CMD_ID_OFFSET 4 #define DPNI_CMD(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION) +#define DPNI_CMD_V2(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_2ND_VERSION) #define DPNI_CMDID_OPEN DPNI_CMD(0x801) #define DPNI_CMDID_CLOSE DPNI_CMD(0x800) @@ -45,7 +47,7 @@ #define DPNI_CMDID_SET_MAX_FRAME_LENGTH DPNI_CMD(0x216) #define DPNI_CMDID_GET_MAX_FRAME_LENGTH DPNI_CMD(0x217) #define DPNI_CMDID_SET_LINK_CFG DPNI_CMD(0x21A) -#define DPNI_CMDID_SET_TX_SHAPING DPNI_CMD(0x21B) +#define DPNI_CMDID_SET_TX_SHAPING DPNI_CMD_V2(0x21B) #define DPNI_CMDID_SET_MCAST_PROMISC DPNI_CMD(0x220) #define DPNI_CMDID_GET_MCAST_PROMISC DPNI_CMD(0x221) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 98be51d8b08c..bfa2826c5545 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -229,7 +229,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) /* Return all Fs if nothing was there */ if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && !priv->has_a011043) { - dev_err(&bus->dev, + dev_dbg(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); return 0xffff; diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig index 936e2dd3bb13..b47bd5440c5f 100644 --- a/drivers/net/ethernet/huawei/hinic/Kconfig +++ b/drivers/net/ethernet/huawei/hinic/Kconfig @@ -6,6 +6,7 @@ config HINIC tristate "Huawei Intelligent PCIE Network Interface Card" depends on (PCI_MSI && (X86 || ARM64)) + select NET_DEVLINK help This driver supports HiNIC PCIE Ethernet cards. To compile this driver as part of the kernel, choose Y here. diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index 02cd635d6914..eb97f2d6b1ad 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -58,9 +58,9 @@ static int change_mac(struct hinic_dev *nic_dev, const u8 *addr, sizeof(port_mac_cmd), &port_mac_cmd, &out_size); if (err || out_size != sizeof(port_mac_cmd) || - (port_mac_cmd.status && - port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY && - port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) { + (port_mac_cmd.status && + (port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY || !HINIC_IS_VF(hwif)) && + port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) { dev_err(&pdev->dev, "Failed to change MAC, err: %d, status: 0x%x, out size: 0x%x\n", err, port_mac_cmd.status, out_size); return -EFAULT; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index 4d63680f2143..f8a26459ff65 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -38,8 +38,7 @@ static int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr, err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_MAC, &mac_info, sizeof(mac_info), &mac_info, &out_size); if (err || out_size != sizeof(mac_info) || - (mac_info.status && mac_info.status != HINIC_PF_SET_VF_ALREADY && - mac_info.status != HINIC_MGMT_STATUS_EXIST)) { + (mac_info.status && mac_info.status != HINIC_MGMT_STATUS_EXIST)) { dev_err(&hwdev->func_to_io.hwif->pdev->dev, "Failed to set MAC, err: %d, status: 0x%x, out size: 0x%x\n", err, mac_info.status, out_size); return -EIO; @@ -503,8 +502,7 @@ struct hinic_sriov_info *hinic_get_sriov_info_by_pcidev(struct pci_dev *pdev) static int hinic_check_mac_info(u8 status, u16 vlan_id) { - if ((status && status != HINIC_MGMT_STATUS_EXIST && - status != HINIC_PF_SET_VF_ALREADY) || + if ((status && status != HINIC_MGMT_STATUS_EXIST) || (vlan_id & CHECK_IPSU_15BIT && status == HINIC_MGMT_STATUS_EXIST)) return -EINVAL; @@ -546,12 +544,6 @@ static int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac, return -EINVAL; } - if (mac_info.status == HINIC_PF_SET_VF_ALREADY) { - dev_warn(&hwdev->hwif->pdev->dev, - "PF has already set VF MAC. Ignore update operation\n"); - return HINIC_PF_SET_VF_ALREADY; - } - if (mac_info.status == HINIC_MGMT_STATUS_EXIST) dev_warn(&hwdev->hwif->pdev->dev, "MAC is repeated. Ignore update operation\n"); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d870343cf689..cf539db79af9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3806,8 +3806,8 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) static int __maybe_unused iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); - struct iavf_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; + struct net_device *netdev = pci_get_drvdata(pdev); + struct iavf_adapter *adapter = netdev_priv(netdev); u32 err; pci_set_master(pdev); diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 34abfcea9858..7db5fd977367 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2288,26 +2288,28 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) { struct ice_hw_func_caps *func_caps = &hw->func_caps; struct ice_hw_dev_caps *dev_caps = &hw->dev_caps; - u32 valid_func, rxq_first_id, txq_first_id; - u32 msix_vector_first_id, max_mtu; + struct ice_hw_common_caps cached_caps; u32 num_funcs; /* cache some func_caps values that should be restored after memset */ - valid_func = func_caps->common_cap.valid_functions; - txq_first_id = func_caps->common_cap.txq_first_id; - rxq_first_id = func_caps->common_cap.rxq_first_id; - msix_vector_first_id = func_caps->common_cap.msix_vector_first_id; - max_mtu = func_caps->common_cap.max_mtu; + cached_caps = func_caps->common_cap; /* unset func capabilities */ memset(func_caps, 0, sizeof(*func_caps)); +#define ICE_RESTORE_FUNC_CAP(name) \ + func_caps->common_cap.name = cached_caps.name + /* restore cached values */ - func_caps->common_cap.valid_functions = valid_func; - func_caps->common_cap.txq_first_id = txq_first_id; - func_caps->common_cap.rxq_first_id = rxq_first_id; - func_caps->common_cap.msix_vector_first_id = msix_vector_first_id; - func_caps->common_cap.max_mtu = max_mtu; + ICE_RESTORE_FUNC_CAP(valid_functions); + ICE_RESTORE_FUNC_CAP(txq_first_id); + ICE_RESTORE_FUNC_CAP(rxq_first_id); + ICE_RESTORE_FUNC_CAP(msix_vector_first_id); + ICE_RESTORE_FUNC_CAP(max_mtu); + ICE_RESTORE_FUNC_CAP(nvm_unified_update); + ICE_RESTORE_FUNC_CAP(nvm_update_pending_nvm); + ICE_RESTORE_FUNC_CAP(nvm_update_pending_orom); + ICE_RESTORE_FUNC_CAP(nvm_update_pending_netlist); /* one Tx and one Rx queue in safe mode */ func_caps->common_cap.num_rxq = 1; @@ -2318,22 +2320,25 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) func_caps->guar_num_vsi = 1; /* cache some dev_caps values that should be restored after memset */ - valid_func = dev_caps->common_cap.valid_functions; - txq_first_id = dev_caps->common_cap.txq_first_id; - rxq_first_id = dev_caps->common_cap.rxq_first_id; - msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id; - max_mtu = dev_caps->common_cap.max_mtu; + cached_caps = dev_caps->common_cap; num_funcs = dev_caps->num_funcs; /* unset dev capabilities */ memset(dev_caps, 0, sizeof(*dev_caps)); +#define ICE_RESTORE_DEV_CAP(name) \ + dev_caps->common_cap.name = cached_caps.name + /* restore cached values */ - dev_caps->common_cap.valid_functions = valid_func; - dev_caps->common_cap.txq_first_id = txq_first_id; - dev_caps->common_cap.rxq_first_id = rxq_first_id; - dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id; - dev_caps->common_cap.max_mtu = max_mtu; + ICE_RESTORE_DEV_CAP(valid_functions); + ICE_RESTORE_DEV_CAP(txq_first_id); + ICE_RESTORE_DEV_CAP(rxq_first_id); + ICE_RESTORE_DEV_CAP(msix_vector_first_id); + ICE_RESTORE_DEV_CAP(max_mtu); + ICE_RESTORE_DEV_CAP(nvm_unified_update); + ICE_RESTORE_DEV_CAP(nvm_update_pending_nvm); + ICE_RESTORE_DEV_CAP(nvm_update_pending_orom); + ICE_RESTORE_DEV_CAP(nvm_update_pending_netlist); dev_caps->num_funcs = num_funcs; /* one Tx and one Rx queue per function in safe mode */ diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index deaefe00c9c0..8968fdd4816b 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -289,7 +289,13 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset, return -EIO; } - err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, HZ, &event); + /* In most cases, firmware reports a write completion within a few + * milliseconds. However, it has been observed that a completion might + * take more than a second to complete in some cases. The timeout here + * is conservative and is intended to prevent failure to update when + * firmware is slow to respond. + */ + err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event); if (err) { dev_err(dev, "Timed out waiting for firmware write completion for module 0x%02x, err %d\n", module, err); @@ -513,7 +519,7 @@ static int ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags, return -EIO; } - err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, HZ, + err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, 30 * HZ, &event); if (err) { dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f2682776f8c8..ebbb8f54871c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -246,7 +246,7 @@ static int ice_get_free_slot(void *array, int size, int curr) * ice_vsi_delete - delete a VSI from the switch * @vsi: pointer to VSI being removed */ -void ice_vsi_delete(struct ice_vsi *vsi) +static void ice_vsi_delete(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_vsi_ctx *ctxt; @@ -313,7 +313,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) * * Returns 0 on success, negative on failure */ -int ice_vsi_clear(struct ice_vsi *vsi) +static int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; struct device *dev; @@ -563,7 +563,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) * ice_vsi_put_qs - Release queues from VSI to PF * @vsi: the VSI that is going to release queues */ -void ice_vsi_put_qs(struct ice_vsi *vsi) +static void ice_vsi_put_qs(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; int i; @@ -1196,6 +1196,18 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) { int i; + /* Avoid stale references by clearing map from vector to ring */ + if (vsi->q_vectors) { + ice_for_each_q_vector(vsi, i) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + if (q_vector) { + q_vector->tx.ring = NULL; + q_vector->rx.ring = NULL; + } + } + } + if (vsi->tx_rings) { for (i = 0; i < vsi->alloc_txq; i++) { if (vsi->tx_rings[i]) { @@ -2291,7 +2303,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (status) { dev_err(dev, "VSI %d failed lan queue config, error %s\n", vsi->vsi_num, ice_stat_str(status)); - goto unroll_vector_base; + goto unroll_clear_rings; } /* Add switch rule to drop all Tx Flow Control Frames, of look up diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 981f3a156c24..3da17895a2b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -45,10 +45,6 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); -void ice_vsi_delete(struct ice_vsi *vsi); - -int ice_vsi_clear(struct ice_vsi *vsi); - #ifdef CONFIG_DCB int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); #endif /* CONFIG_DCB */ @@ -79,8 +75,6 @@ bool ice_is_reset_in_progress(unsigned long *state); void ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio); -void ice_vsi_put_qs(struct ice_vsi *vsi); - void ice_vsi_dis_irq(struct ice_vsi *vsi); void ice_vsi_free_irq(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4634b48949bb..54a7f55eb8c1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3169,10 +3169,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf) return -EBUSY; vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); - if (!vsi) { - status = -ENOMEM; - goto unroll_vsi_setup; - } + if (!vsi) + return -ENOMEM; status = ice_cfg_netdev(vsi); if (status) { @@ -3219,12 +3217,7 @@ unroll_napi_add: } unroll_vsi_setup: - if (vsi) { - ice_vsi_free_q_vectors(vsi); - ice_vsi_delete(vsi); - ice_vsi_put_qs(vsi); - ice_vsi_clear(vsi); - } + ice_vsi_release(vsi); return status; } @@ -4522,6 +4515,7 @@ static int __maybe_unused ice_suspend(struct device *dev) } ice_clear_interrupt_scheme(pf); + pci_save_state(pdev); pci_wake_from_d3(pdev, pf->wol_ena); pci_set_power_state(pdev, PCI_D3hot); return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2f8a4cfc5fa1..86ca8b9ea1b8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5396,9 +5396,10 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter, return err; } -static int ixgbe_macvlan_up(struct net_device *vdev, void *data) +static int ixgbe_macvlan_up(struct net_device *vdev, + struct netdev_nested_priv *priv) { - struct ixgbe_adapter *adapter = data; + struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data; struct ixgbe_fwd_adapter *accel; if (!netif_is_macvlan(vdev)) @@ -5415,8 +5416,12 @@ static int ixgbe_macvlan_up(struct net_device *vdev, void *data) static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) { + struct netdev_nested_priv priv = { + .data = (void *)adapter, + }; + netdev_walk_all_upper_dev_rcu(adapter->netdev, - ixgbe_macvlan_up, adapter); + ixgbe_macvlan_up, &priv); } static void ixgbe_configure(struct ixgbe_adapter *adapter) @@ -9023,9 +9028,10 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter) } #endif /* CONFIG_IXGBE_DCB */ -static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data) +static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, + struct netdev_nested_priv *priv) { - struct ixgbe_adapter *adapter = data; + struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data; struct ixgbe_fwd_adapter *accel; int pool; @@ -9062,13 +9068,16 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data) static void ixgbe_defrag_macvlan_pools(struct net_device *dev) { struct ixgbe_adapter *adapter = netdev_priv(dev); + struct netdev_nested_priv priv = { + .data = (void *)adapter, + }; /* flush any stale bits out of the fwd bitmask */ bitmap_clear(adapter->fwd_bitmask, 1, 63); /* walk through upper devices reassigning pools */ netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool, - adapter); + &priv); } /** @@ -9242,14 +9251,18 @@ struct upper_walk_data { u8 queue; }; -static int get_macvlan_queue(struct net_device *upper, void *_data) +static int get_macvlan_queue(struct net_device *upper, + struct netdev_nested_priv *priv) { if (netif_is_macvlan(upper)) { struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper); - struct upper_walk_data *data = _data; - struct ixgbe_adapter *adapter = data->adapter; - int ifindex = data->ifindex; + struct ixgbe_adapter *adapter; + struct upper_walk_data *data; + int ifindex; + data = (struct upper_walk_data *)priv->data; + ifindex = data->ifindex; + adapter = data->adapter; if (vadapter && upper->ifindex == ifindex) { data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; data->action = data->queue; @@ -9265,6 +9278,7 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, { struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; unsigned int num_vfs = adapter->num_vfs, vf; + struct netdev_nested_priv priv; struct upper_walk_data data; struct net_device *upper; @@ -9284,8 +9298,9 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, data.ifindex = ifindex; data.action = 0; data.queue = 0; + priv.data = (void *)&data; if (netdev_walk_all_upper_dev_rcu(adapter->netdev, - get_macvlan_queue, &data)) { + get_macvlan_queue, &priv)) { *action = data.action; *queue = data.queue; diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 635ff3a5dcfb..51ed8a54d380 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -245,6 +245,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) int pkts = 0; int bytes = 0; + netif_tx_lock(net_dev); while (pkts < budget) { struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->tx_free]; @@ -268,6 +269,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) net_dev->stats.tx_bytes += bytes; netdev_completed_queue(ch->priv->net_dev, pkts, bytes); + netif_tx_unlock(net_dev); if (netif_queue_stopped(net_dev)) netif_wake_queue(net_dev); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c4345e3d616f..5bf0409f5d42 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3400,24 +3400,15 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp, txq->last_desc = txq->size - 1; txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL); - if (!txq->buf) { - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); + if (!txq->buf) return -ENOMEM; - } /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent, txq->size * TSO_HEADER_SIZE, &txq->tso_hdrs_phys, GFP_KERNEL); - if (!txq->tso_hdrs) { - kfree(txq->buf); - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); + if (!txq->tso_hdrs) return -ENOMEM; - } /* Setup XPS mapping */ if (txq_number > 1) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 387e33fa417a..2718fe201c14 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -17,7 +17,7 @@ static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); -void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) +void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid) { void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); struct otx2_mbox_dev *mdev = &mbox->dev[devid]; @@ -26,13 +26,21 @@ void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; - spin_lock(&mdev->mbox_lock); mdev->msg_size = 0; mdev->rsp_size = 0; tx_hdr->num_msgs = 0; tx_hdr->msg_size = 0; rx_hdr->num_msgs = 0; rx_hdr->msg_size = 0; +} +EXPORT_SYMBOL(__otx2_mbox_reset); + +void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) +{ + struct otx2_mbox_dev *mdev = &mbox->dev[devid]; + + spin_lock(&mdev->mbox_lock); + __otx2_mbox_reset(mbox, devid); spin_unlock(&mdev->mbox_lock); } EXPORT_SYMBOL(otx2_mbox_reset); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6dfd0f90cd70..ab433789d2c3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -93,6 +93,7 @@ struct mbox_msghdr { }; void otx2_mbox_reset(struct otx2_mbox *mbox, int devid); +void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid); void otx2_mbox_destroy(struct otx2_mbox *mbox); int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase, struct pci_dev *pdev, void __force *reg_base, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index dcf25a092008..b89dde2c8b08 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -463,6 +463,7 @@ void rvu_nix_freemem(struct rvu *rvu); int rvu_get_nixlf_count(struct rvu *rvu); void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf); int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr); +int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -477,7 +478,7 @@ void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); -void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc); +void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable); int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 01a793105599..0fc70824fd6b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -17,7 +17,6 @@ #include "npc.h" #include "cgx.h" -static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, int type, int chan_id); @@ -2020,7 +2019,7 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list, return 0; } -static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) +int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) { int err = 0, idx, next_idx, last_idx; struct nix_mce_list *mce_list; @@ -2065,7 +2064,7 @@ static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) /* Disable MCAM entry in NPC */ if (!mce_list->count) { - rvu_npc_disable_bcast_entry(rvu, pcifunc); + rvu_npc_enable_bcast_entry(rvu, pcifunc, false); goto end; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 0a214084406a..fbaf9bcd83f2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -530,7 +530,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, NIX_INTF_RX, &entry, true); } -void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc) +void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) { struct npc_mcam *mcam = &rvu->hw->mcam; int blkaddr, index; @@ -543,7 +543,7 @@ void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc) pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; index = npc_get_nixlf_mcam_index(mcam, pcifunc, 0, NIXLF_BCAST_ENTRY); - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, @@ -622,23 +622,35 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, nixlf, NIXLF_UCAST_ENTRY); npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); - /* For PF, ena/dis promisc and bcast MCAM match entries */ - if (pcifunc & RVU_PFVF_FUNC_MASK) + /* For PF, ena/dis promisc and bcast MCAM match entries. + * For VFs add/delete from bcast list when RX multicast + * feature is present. + */ + if (pcifunc & RVU_PFVF_FUNC_MASK && !rvu->hw->cap.nix_rx_multicast) return; /* For bcast, enable/disable only if it's action is not * packet replication, incase if action is replication - * then this PF's nixlf is removed from bcast replication + * then this PF/VF's nixlf is removed from bcast replication * list. */ - index = npc_get_nixlf_mcam_index(mcam, pcifunc, + index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, NIXLF_BCAST_ENTRY); bank = npc_get_bank(mcam, index); *(u64 *)&action = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_ACTION(index & (mcam->banksize - 1), bank)); - if (action.op != NIX_RX_ACTIONOP_MCAST) + + /* VFs will not have BCAST entry */ + if (action.op != NIX_RX_ACTIONOP_MCAST && + !(pcifunc & RVU_PFVF_FUNC_MASK)) { npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); + } else { + nix_update_bcast_mce_list(rvu, pcifunc, enable); + /* Enable PF's BCAST entry for packet replication */ + rvu_npc_enable_bcast_entry(rvu, pcifunc, enable); + } + if (enable) rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf); else diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 75a8c407e815..2fb45670aca4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -370,8 +370,8 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mbox = &pf->mbox; dst_size = dst_mbox->mbox.tx_size - ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN); - /* Check if msgs fit into destination area */ - if (mbox_hdr->msg_size > dst_size) + /* Check if msgs fit into destination area and has valid size */ + if (mbox_hdr->msg_size > dst_size || !mbox_hdr->msg_size) return -EINVAL; dst_mdev = &dst_mbox->mbox.dev[0]; @@ -526,10 +526,10 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) end: offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - - otx2_mbox_reset(mbox, vf_idx); } static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) @@ -803,10 +803,11 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - otx2_mbox_reset(mbox, 0); } static void otx2_handle_link_event(struct otx2_nic *pf) @@ -1560,10 +1561,13 @@ int otx2_open(struct net_device *netdev) err = otx2_rxtx_enable(pf, true); if (err) - goto err_free_cints; + goto err_tx_stop_queues; return 0; +err_tx_stop_queues: + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 3a5b34a2a7a6..e46834e043be 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -524,6 +524,7 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->ol3type = NIX_SENDL3TYPE_IP4_CKSUM; } else if (skb->protocol == htons(ETH_P_IPV6)) { proto = ipv6_hdr(skb)->nexthdr; + sqe_hdr->ol3type = NIX_SENDL3TYPE_IP6; } if (proto == IPPROTO_TCP) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 92a3db69a6cd..2f90f1721441 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -99,10 +99,10 @@ static void otx2vf_vfaf_mbox_handler(struct work_struct *work) msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg); offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - - otx2_mbox_reset(mbox, 0); } static int otx2vf_process_mbox_msg_up(struct otx2_nic *vf, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1d91a0d0ab1d..2d1f4b3be9bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -69,12 +69,10 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; -static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, - struct mlx5_cmd_msg *in, - struct mlx5_cmd_msg *out, - void *uout, int uout_size, - mlx5_cmd_cbk_t cbk, - void *context, int page_queue) +static struct mlx5_cmd_work_ent * +cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t cbk, void *context, int page_queue) { gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; struct mlx5_cmd_work_ent *ent; @@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, if (!ent) return ERR_PTR(-ENOMEM); + ent->idx = -EINVAL; ent->in = in; ent->out = out; ent->uout = uout; @@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, ent->context = context; ent->cmd = cmd; ent->page_queue = page_queue; + refcount_set(&ent->refcnt, 1); return ent; } +static void cmd_free_ent(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + static u8 alloc_token(struct mlx5_cmd *cmd) { u8 token; @@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int alloc_ent(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd) { unsigned long flags; int ret; @@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd) return ret < cmd->max_reg_cmds ? ret : -ENOMEM; } -static void free_ent(struct mlx5_cmd *cmd, int idx) +static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { unsigned long flags; @@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx) spin_unlock_irqrestore(&cmd->alloc_lock, flags); } +static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) +{ + refcount_inc(&ent->refcnt); +} + +static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) +{ + if (!refcount_dec_and_test(&ent->refcnt)) + return; + + if (ent->idx >= 0) + cmd_free_index(ent->cmd, ent->idx); + + cmd_free_ent(ent); +} + static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) { return cmd->cmd_buf + (idx << cmd->log_stride); @@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent) ent->ret = -ETIMEDOUT; } -static void free_cmd(struct mlx5_cmd_work_ent *ent) -{ - kfree(ent); -} - static int verify_signature(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd_mailbox *next = ent->out->next; @@ -837,11 +853,22 @@ static void cb_timeout_handler(struct work_struct *work) struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + mlx5_cmd_eq_recover(dev); + + /* Maybe got handled by eq recover ? */ + if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + goto out; /* phew, already handled */ + } + ent->ret = -ETIMEDOUT; - mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", + ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + +out: + cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); @@ -856,6 +883,32 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } +static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) +{ + unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); + int idx; + +retry: + idx = cmd_alloc_index(cmd); + if (idx < 0 && time_before(jiffies, alloc_end)) { + /* Index allocation can fail on heavy load of commands. This is a temporary + * situation as the current command already holds the semaphore, meaning that + * another command completion is being handled and it is expected to release + * the entry index soon. + */ + cpu_relax(); + goto retry; + } + return idx; +} + +bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) +{ + return pci_channel_offline(dev->pdev) || + dev->cmd.state != MLX5_CMDIF_STATE_UP || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -873,14 +926,14 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = alloc_ent(cmd); + alloc_ret = cmd_alloc_index_retry(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { ent->callback(-EAGAIN, ent->context); mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); - free_cmd(ent); + cmd_ent_put(ent); } else { ent->ret = -EAGAIN; complete(&ent->done); @@ -916,15 +969,12 @@ static void cmd_work_handler(struct work_struct *work) ent->ts1 = ktime_get_ns(); cmd_mode = cmd->mode; - if (ent->callback) - schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout)) + cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); /* Skip sending command to fw if internal error */ - if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || - cmd->state != MLX5_CMDIF_STATE_UP || - !opcode_allowed(&dev->cmd, ent->op)) { + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; u32 drv_synd; @@ -933,13 +983,10 @@ static void cmd_work_handler(struct work_struct *work) MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); - /* no doorbell, no need to keep the entry */ - free_ent(cmd, ent->idx); - if (ent->callback) - free_cmd(ent); return; } + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -983,6 +1030,35 @@ static const char *deliv_status_to_str(u8 status) } } +enum { + MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000, +}; + +static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC); + + mlx5_cmd_eq_recover(dev); + + /* Re-wait on the ent->done after executing the recovery flow. If the + * recovery flow (or any other recovery flow running simultaneously) + * has recovered an EQE, it should cause the entry to be completed by + * the command interface. + */ + if (wait_for_completion_timeout(&ent->done, timeout)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + return; + } + + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); +} + static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -994,12 +1070,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) ent->ret = -ECANCELED; goto out_err; } - if (cmd->mode == CMD_MODE_POLLING || ent->polling) { + if (cmd->mode == CMD_MODE_POLLING || ent->polling) wait_for_completion(&ent->done); - } else if (!wait_for_completion_timeout(&ent->done, timeout)) { - ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); - } + else if (!wait_for_completion_timeout(&ent->done, timeout)) + wait_func_handle_exec_timeout(dev, ent); out_err: err = ent->ret; @@ -1039,11 +1113,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (callback && page_queue) return -EINVAL; - ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context, - page_queue); + ent = cmd_alloc_ent(cmd, in, out, uout, uout_size, + callback, context, page_queue); if (IS_ERR(ent)) return PTR_ERR(ent); + /* put for this ent is when consumed, depending on the use case + * 1) (!callback) blocking flow: by caller after wait_func completes + * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled + */ + ent->token = token; ent->polling = force_polling; @@ -1062,12 +1141,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, } if (callback) - goto out; + goto out; /* mlx5_cmd_comp_handler() will put(ent) */ err = wait_func(dev, ent); - if (err == -ETIMEDOUT) - goto out; - if (err == -ECANCELED) + if (err == -ETIMEDOUT || err == -ECANCELED) goto out_free; ds = ent->ts2 - ent->ts1; @@ -1085,7 +1162,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, *status = ent->status; out_free: - free_cmd(ent); + cmd_ent_put(ent); out: return err; } @@ -1516,14 +1593,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force if (!forced) { mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", ent->idx); - free_ent(cmd, ent->idx); - free_cmd(ent); + cmd_ent_put(ent); } continue; } - if (ent->callback) - cancel_delayed_work(&ent->cb_timeout_work); + if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work)) + cmd_ent_put(ent); /* timeout work was canceled */ + + if (!forced || /* Real FW completion */ + pci_channel_offline(dev->pdev) || /* FW is inaccessible */ + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + cmd_ent_put(ent); + if (ent->page_queue) sem = &cmd->pages_sem; else @@ -1545,10 +1627,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force ent->ret, deliv_status_to_str(ent->status), ent->status); } - /* only real completion will free the entry slot */ - if (!forced) - free_ent(cmd, ent->idx); - if (ent->callback) { ds = ent->ts2 - ent->ts1; if (ent->op < MLX5_CMD_OP_MAX) { @@ -1576,10 +1654,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force free_msg(dev, ent->in); err = err ? err : ent->status; - if (!forced) - free_cmd(ent); + /* final consumer is done, release ent */ + cmd_ent_put(ent); callback(err, context); } else { + /* release wait_func() so mlx5_cmd_invoke() + * can make the final ent_put() + */ complete(&ent->done); } up(sem); @@ -1589,8 +1670,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) { + struct mlx5_cmd *cmd = &dev->cmd; + unsigned long bitmask; unsigned long flags; u64 vector; + int i; /* wait for pending handlers to complete */ mlx5_eq_synchronize_cmd_irq(dev); @@ -1599,11 +1683,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) if (!vector) goto no_trig; + bitmask = vector; + /* we must increment the allocated entries refcount before triggering the completions + * to guarantee pending commands will not get freed in the meanwhile. + * For that reason, it also has to be done inside the alloc_lock. + */ + for_each_set_bit(i, &bitmask, (1 << cmd->log_sz)) + cmd_ent_get(cmd->ent_arr[i]); vector |= MLX5_TRIGGERED_CMD_COMP; spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); mlx5_cmd_comp_handler(dev, vector, true); + for_each_set_bit(i, &bitmask, (1 << cmd->log_sz)) + cmd_ent_put(cmd->ent_arr[i]); return; no_trig: @@ -1711,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, u8 token; opcode = MLX5_GET(mbox_in, in, opcode); - if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || - dev->cmd.state != MLX5_CMDIF_STATE_UP || - !opcode_allowed(&dev->cmd, opcode)) { + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) { err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, syndrome, drv_synd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 90d5caabd6af..356f5852955f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -91,7 +91,12 @@ struct page_pool; #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) -#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between + * WQEs, This page will absorb write overflow by the hardware, when + * receiving packets larger than MTU. These oversize packets are + * dropped by the driver at a later stage. + */ +#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8)) #define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ @@ -617,6 +622,7 @@ struct mlx5e_rq { u32 rqn; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; + struct mlx5e_dma_info wqe_overflow; /* XDP read-mostly */ struct xdp_rxq_info xdp_rxq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 96608dbb9314..308fd279669e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -569,6 +569,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) return -EOPNOTSUPP; + if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy)) + return -EOPNOTSUPP; + MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index 906292035088..58e27038c947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -110,11 +110,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work) rtnl_unlock(); } +struct neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct mlx5e_neigh_hash_entry *nhe; +}; + +static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) +{ + neigh_release(update_work->n); + mlx5e_rep_neigh_entry_release(update_work->nhe); + kfree(update_work); +} + static void mlx5e_rep_neigh_update(struct work_struct *work) { - struct mlx5e_neigh_hash_entry *nhe = - container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); - struct neighbour *n = nhe->n; + struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, + work); + struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; + struct neighbour *n = update_work->n; struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; @@ -146,30 +160,42 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_encap_put(priv, e); } - mlx5e_rep_neigh_entry_release(nhe); rtnl_unlock(); - neigh_release(n); + mlx5e_release_neigh_update_work(update_work); } -static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe, - struct neighbour *n) +static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, + struct neighbour *n) { - /* Take a reference to ensure the neighbour and mlx5 encap - * entry won't be destructed until we drop the reference in - * delayed work. - */ - neigh_hold(n); + struct neigh_update_work *update_work; + struct mlx5e_neigh_hash_entry *nhe; + struct mlx5e_neigh m_neigh = {}; - /* This assignment is valid as long as the the neigh reference - * is taken - */ - nhe->n = n; + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (WARN_ON(!update_work)) + return NULL; - if (!queue_work(priv->wq, &nhe->neigh_update_work)) { - mlx5e_rep_neigh_entry_release(nhe); - neigh_release(n); + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* Obtain reference to nhe as last step in order not to release it in + * atomic context. + */ + rcu_read_lock(); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + rcu_read_unlock(); + if (!nhe) { + kfree(update_work); + return NULL; } + + INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->nhe = nhe; + + return update_work; } static int mlx5e_rep_netevent_event(struct notifier_block *nb, @@ -181,7 +207,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_neigh_hash_entry *nhe = NULL; - struct mlx5e_neigh m_neigh = {}; + struct neigh_update_work *update_work; struct neigh_parms *p; struct neighbour *n; bool found = false; @@ -196,17 +222,11 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, #endif return NOTIFY_DONE; - m_neigh.dev = n->dev; - m_neigh.family = n->ops->family; - memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - - rcu_read_lock(); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); - rcu_read_unlock(); - if (!nhe) + update_work = mlx5e_alloc_neigh_update_work(priv, n); + if (!update_work) return NOTIFY_DONE; - mlx5e_rep_queue_neigh_update_work(priv, nhe, n); + queue_work(priv->wq, &update_work->work); break; case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -352,7 +372,6 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, (*nhe)->priv = priv; memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); - INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index bc5f72ec3623..a8be40cbe325 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -246,8 +246,10 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, case FLOW_ACT_MANGLE_HDR_TYPE_IP6: ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); ip6_offset /= 4; - if (ip6_offset < 8) + if (ip6_offset < 4) tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); + else if (ip6_offset < 8) + tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); else return -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 64d002d92250..1f48f99c0997 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } + if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type)) + return 0; + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(*rule_p)) { @@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled && - !(priv->netdev->flags & IFF_PROMISC)) + if (priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } @@ -415,8 +417,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled && - !(priv->netdev->flags & IFF_PROMISC)) + WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state))); + + /* must be called after DESTROY bit is set and + * set_rx_mode is called and flushed + */ + if (priv->fs.vlan.cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b3cda7b6e5e1..42ec28e29834 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -246,12 +246,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u64 npages, u8 page_shift, - struct mlx5_core_mkey *umr_mkey) + struct mlx5_core_mkey *umr_mkey, + dma_addr_t filler_addr) { - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_mtt *mtt; + int inlen; void *mkc; u32 *in; int err; + int i; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) @@ -271,6 +276,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + MLX5_MTT_OCTW(npages)); + + /* Initialize the mkey with all MTTs pointing to a default + * page (filler_addr). When the channels are activated, UMR + * WQEs will redirect the RX WQEs to the actual memory from + * the RQ's pool, while the gaps (wqe_overflow) remain mapped + * to the default page. + */ + mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0 ; i < npages ; i++) + mtt[i].ptag = cpu_to_be64(filler_addr); err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); @@ -282,7 +299,8 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq { u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq)); - return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); + return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey, + rq->wqe_overflow.addr); } static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) @@ -350,6 +368,28 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_rq_cqe_err(rq); } +static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + rq->wqe_overflow.page = alloc_page(GFP_KERNEL); + if (!rq->wqe_overflow.page) + return -ENOMEM; + + rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, + PAGE_SIZE, rq->buff.map_dir); + if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { + __free_page(rq->wqe_overflow.page); + return -ENOMEM; + } + return 0; +} + +static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, + rq->buff.map_dir); + __free_page(rq->wqe_overflow.page); +} + static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -396,7 +436,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix); if (err < 0) - goto err_rq_wq_destroy; + goto err_rq_xdp_prog; rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); @@ -407,6 +447,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) + goto err_rq_xdp; + + err = mlx5e_alloc_mpwqe_rq_drop_page(rq); + if (err) goto err_rq_wq_destroy; rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; @@ -424,18 +468,18 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) - goto err_rq_wq_destroy; + goto err_rq_drop_page; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); err = mlx5e_rq_alloc_mpwqe_info(rq, c); if (err) - goto err_free; + goto err_rq_mkey; break; default: /* MLX5_WQ_TYPE_CYCLIC */ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_wq_destroy; + goto err_rq_xdp; rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; @@ -450,19 +494,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->wqe.frags) { err = -ENOMEM; - goto err_free; + goto err_rq_wq_destroy; } err = mlx5e_init_di_list(rq, wq_sz, c->cpu); if (err) - goto err_free; + goto err_rq_frags; rq->mkey_be = c->mkey_be; } err = mlx5e_rq_set_handlers(rq, params, xsk); if (err) - goto err_free; + goto err_free_by_rq_type; if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, @@ -486,13 +530,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (IS_ERR(rq->page_pool)) { err = PTR_ERR(rq->page_pool); rq->page_pool = NULL; - goto err_free; + goto err_free_by_rq_type; } err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); } if (err) - goto err_free; + goto err_free_by_rq_type; for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -542,23 +586,27 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, return 0; -err_free: +err_free_by_rq_type: switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); +err_rq_mkey: mlx5_core_destroy_mkey(mdev, &rq->umr_mkey); +err_rq_drop_page: + mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - kvfree(rq->wqe.frags); mlx5e_free_di_list(rq); +err_rq_frags: + kvfree(rq->wqe.frags); } - err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); +err_rq_xdp: + xdp_rxq_info_unreg(&rq->xdp_rxq); +err_rq_xdp_prog: if (params->xdp_prog) bpf_prog_put(params->xdp_prog); - xdp_rxq_info_unreg(&rq->xdp_rxq); - page_pool_destroy(rq->page_pool); - mlx5_wq_destroy(&rq->wq_ctrl); return err; } @@ -580,6 +628,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); + mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ kvfree(rq->wqe.frags); @@ -4177,6 +4226,21 @@ int mlx5e_get_vf_stats(struct net_device *dev, } #endif +static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, + struct sk_buff *skb) +{ + switch (skb->inner_protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_TEB): + return true; + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): + return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); + } + return false; +} + static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) @@ -4199,7 +4263,9 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, switch (proto) { case IPPROTO_GRE: - return features; + if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) + return features; + break; case IPPROTO_IPIP: case IPPROTO_IPV6: if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 622c27ae4ac7..0d1562e20118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -135,12 +135,6 @@ struct mlx5e_neigh_hash_entry { /* encap list sharing the same neigh */ struct list_head encap_list; - /* valid only when the neigh reference is taken during - * neigh_update_work workqueue callback. - */ - struct neighbour *n; - struct work_struct neigh_update_work; - /* neigh hash entry can be deleted only when the refcount is zero. * refcount is needed to avoid neigh hash entry removal by TC, while * it's used by the neigh notification call. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 31ef9f8420c8..22a19d391e17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -189,6 +189,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) return count_eqe; } +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags) + __acquires(&eq->lock) +{ + if (in_irq()) + spin_lock(&eq->lock); + else + spin_lock_irqsave(&eq->lock, *flags); +} + +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags) + __releases(&eq->lock) +{ + if (in_irq()) + spin_unlock(&eq->lock); + else + spin_unlock_irqrestore(&eq->lock, *flags); +} + +enum async_eq_nb_action { + ASYNC_EQ_IRQ_HANDLER = 0, + ASYNC_EQ_RECOVER = 1, +}; + static int mlx5_eq_async_int(struct notifier_block *nb, unsigned long action, void *data) { @@ -198,11 +221,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb, struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; + unsigned long flags; int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; + mlx5_eq_async_int_lock(eq_async, &flags); + eqe = next_eqe_sw(eq); if (!eqe) goto out; @@ -223,8 +249,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb, out: eq_update_ci(eq, 1); + mlx5_eq_async_int_unlock(eq_async, &flags); - return 0; + return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0; +} + +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; + int eqes; + + eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); + if (eqes) + mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); } static void init_eq_buf(struct mlx5_eq *eq) @@ -569,6 +606,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, int err; eq->irq_nb.notifier_call = mlx5_eq_async_int; + spin_lock_init(&eq->lock); err = create_async_eq(dev, &eq->core, param); if (err) { @@ -656,8 +694,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) cleanup_async_eq(dev, &table->pages_eq, "pages"); cleanup_async_eq(dev, &table->async_eq, "async"); + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); mlx5_cmd_use_polling(dev); cleanup_async_eq(dev, &table->cmd_eq, "cmd"); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 4aaca7400fb2..5c681e31983b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -37,6 +37,7 @@ struct mlx5_eq { struct mlx5_eq_async { struct mlx5_eq core; struct notifier_block irq_nb; + spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */ }; struct mlx5_eq_comp { @@ -81,6 +82,7 @@ void mlx5_cq_tasklet_cb(unsigned long data); struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev); void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index f9b798af6335..c0e18f2ade99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 npages; u32 i = 0; - if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (!mlx5_cmd_is_down(dev)) return mlx5_cmd_exec(dev, in, in_size, out, out_size); /* No hard feelings, we want our pages back! */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 373981a659c7..6fd974920394 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) return 0; err_request_irq: - for (; i >= 0; i--) { + while (i--) { struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4186e29119c2..f3c0e241e1b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3690,13 +3690,13 @@ bool mlxsw_sp_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } -static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) +static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) { - struct mlxsw_sp_port **p_mlxsw_sp_port = data; int ret = 0; if (mlxsw_sp_port_dev_check(lower_dev)) { - *p_mlxsw_sp_port = netdev_priv(lower_dev); + priv->data = (void *)netdev_priv(lower_dev); ret = 1; } @@ -3705,15 +3705,16 @@ static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { - struct mlxsw_sp_port *mlxsw_sp_port; + struct netdev_nested_priv priv = { + .data = NULL, + }; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - mlxsw_sp_port = NULL; - netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port); + netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &priv); - return mlxsw_sp_port; + return (struct mlxsw_sp_port *)priv.data; } struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) @@ -3726,16 +3727,17 @@ struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { - struct mlxsw_sp_port *mlxsw_sp_port; + struct netdev_nested_priv priv = { + .data = NULL, + }; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - mlxsw_sp_port = NULL; netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk, - &mlxsw_sp_port); + &priv); - return mlxsw_sp_port; + return (struct mlxsw_sp_port *)priv.data; } struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 5c020403342f..7cccc41dd69c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -292,13 +292,14 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp_acl_tcam *tcam, int err; group->tcam = tcam; - mutex_init(&group->lock); INIT_LIST_HEAD(&group->region_list); err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); if (err) return err; + mutex_init(&group->lock); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 24f1fd1f8d56..460cb523312f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7351,9 +7351,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } -static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data) +static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct mlxsw_sp_rif *rif = data; + struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data; if (!netif_is_macvlan(dev)) return 0; @@ -7364,12 +7365,16 @@ static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data) static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) { + struct netdev_nested_priv priv = { + .data = (void *)rif, + }; + if (!netif_is_macvlan_port(rif->dev)) return 0; netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n"); return netdev_walk_all_upper_dev_rcu(rif->dev, - __mlxsw_sp_rif_macvlan_flush, rif); + __mlxsw_sp_rif_macvlan_flush, &priv); } static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 72912afa6f72..6501ce94ace5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -136,9 +136,9 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, - void *data) + struct netdev_nested_priv *priv) { - struct mlxsw_sp *mlxsw_sp = data; + struct mlxsw_sp *mlxsw_sp = priv->data; mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); return 0; @@ -147,10 +147,14 @@ static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, struct net_device *dev) { + struct netdev_nested_priv priv = { + .data = (void *)mlxsw_sp, + }; + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); netdev_walk_all_upper_dev_rcu(dev, mlxsw_sp_bridge_device_upper_rif_destroy, - mlxsw_sp); + &priv); } static int mlxsw_sp_bridge_device_vxlan_init(struct mlxsw_sp_bridge *bridge, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 8518e1d60da4..aa002db04250 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1253,7 +1253,7 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) struct ocelot_port *ocelot_port = ocelot->ports[port]; int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN; int pause_start, pause_stop; - int atop_wm; + int atop, atop_tot; if (port == ocelot->npi) { maxlen += OCELOT_TAG_LEN; @@ -1274,12 +1274,12 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_STOP, pause_stop); - /* Tail dropping watermark */ - atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) / + /* Tail dropping watermarks */ + atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) / OCELOT_BUFFER_CELL_SZ; - ocelot_write_rix(ocelot, ocelot->ops->wm_enc(9 * maxlen), - SYS_ATOP, port); - ocelot_write(ocelot, ocelot->ops->wm_enc(atop_wm), SYS_ATOP_TOT_CFG); + atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ; + ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port); + ocelot_write(ocelot, ocelot->ops->wm_enc(atop_tot), SYS_ATOP_TOT_CFG); } EXPORT_SYMBOL(ocelot_port_set_maxlen); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index dfb1535f26f2..8a6917691ba6 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -745,6 +745,8 @@ static int ocelot_reset(struct ocelot *ocelot) */ static u16 ocelot_wm_enc(u16 value) { + WARN_ON(value >= 16 * BIT(8)); + if (value >= BIT(8)) return BIT(8) | (value / 16); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fc9e6626db55..11e6962a18e4 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2058,11 +2058,18 @@ static void rtl_release_firmware(struct rtl8169_private *tp) void r8169_apply_firmware(struct rtl8169_private *tp) { + int val; + /* TODO: release firmware if rtl_fw_write_firmware signals failure. */ if (tp->rtl_fw) { rtl_fw_write_firmware(tp, tp->rtl_fw); /* At least one firmware doesn't reset tp->ocp_base. */ tp->ocp_base = OCP_STD_PHY_BASE; + + /* PHY soft reset may still be in progress */ + phy_read_poll_timeout(tp->phydev, MII_BMCR, val, + !(val & BMCR_RESET), + 50000, 600000, true); } } @@ -2239,14 +2246,10 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) default: break; } - - clk_disable_unprepare(tp->clk); } static void rtl_pll_power_up(struct rtl8169_private *tp) { - clk_prepare_enable(tp->clk); - switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: @@ -2904,7 +2907,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) { 0x08, 0x0001, 0x0002 }, { 0x09, 0x0000, 0x0080 }, { 0x19, 0x0000, 0x0224 }, - { 0x00, 0x0000, 0x0004 }, + { 0x00, 0x0000, 0x0008 }, { 0x0c, 0x3df0, 0x0200 }, }; @@ -2921,7 +2924,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) { 0x06, 0x00c0, 0x0020 }, { 0x0f, 0xffff, 0x5200 }, { 0x19, 0x0000, 0x0224 }, - { 0x00, 0x0000, 0x0004 }, + { 0x00, 0x0000, 0x0008 }, { 0x0c, 0x3df0, 0x0200 }, }; @@ -4826,29 +4829,43 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp) #ifdef CONFIG_PM +static int rtl8169_net_resume(struct rtl8169_private *tp) +{ + rtl_rar_set(tp, tp->dev->dev_addr); + + if (tp->TxDescArray) + rtl8169_up(tp); + + netif_device_attach(tp->dev); + + return 0; +} + static int __maybe_unused rtl8169_suspend(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); rtnl_lock(); rtl8169_net_suspend(tp); + if (!device_may_wakeup(tp_to_dev(tp))) + clk_disable_unprepare(tp->clk); rtnl_unlock(); return 0; } -static int rtl8169_resume(struct device *device) +static int __maybe_unused rtl8169_resume(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); - rtl_rar_set(tp, tp->dev->dev_addr); + if (!device_may_wakeup(tp_to_dev(tp))) + clk_prepare_enable(tp->clk); - if (tp->TxDescArray) - rtl8169_up(tp); + /* Reportedly at least Asus X453MA truncates packets otherwise */ + if (tp->mac_version == RTL_GIGA_MAC_VER_37) + rtl_init_rxcfg(tp); - netif_device_attach(tp->dev); - - return 0; + return rtl8169_net_resume(tp); } static int rtl8169_runtime_suspend(struct device *device) @@ -4874,7 +4891,7 @@ static int rtl8169_runtime_resume(struct device *device) __rtl8169_set_wol(tp, tp->saved_wolopts); - return rtl8169_resume(device); + return rtl8169_net_resume(tp); } static int rtl8169_runtime_idle(struct device *device) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index df89d09b253e..99f7aae102ce 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1342,51 +1342,6 @@ static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler, return error; } -/* MDIO bus init function */ -static int ravb_mdio_init(struct ravb_private *priv) -{ - struct platform_device *pdev = priv->pdev; - struct device *dev = &pdev->dev; - int error; - - /* Bitbang init */ - priv->mdiobb.ops = &bb_ops; - - /* MII controller setting */ - priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb); - if (!priv->mii_bus) - return -ENOMEM; - - /* Hook up MII support for ethtool */ - priv->mii_bus->name = "ravb_mii"; - priv->mii_bus->parent = dev; - snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); - - /* Register MDIO bus */ - error = of_mdiobus_register(priv->mii_bus, dev->of_node); - if (error) - goto out_free_bus; - - return 0; - -out_free_bus: - free_mdio_bitbang(priv->mii_bus); - return error; -} - -/* MDIO bus release function */ -static int ravb_mdio_release(struct ravb_private *priv) -{ - /* Unregister mdio bus */ - mdiobus_unregister(priv->mii_bus); - - /* Free bitbang info */ - free_mdio_bitbang(priv->mii_bus); - - return 0; -} - /* Network device open function for Ethernet AVB */ static int ravb_open(struct net_device *ndev) { @@ -1395,13 +1350,6 @@ static int ravb_open(struct net_device *ndev) struct device *dev = &pdev->dev; int error; - /* MDIO bus init */ - error = ravb_mdio_init(priv); - if (error) { - netdev_err(ndev, "failed to initialize MDIO\n"); - return error; - } - napi_enable(&priv->napi[RAVB_BE]); napi_enable(&priv->napi[RAVB_NC]); @@ -1479,7 +1427,6 @@ out_free_irq: out_napi_off: napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); - ravb_mdio_release(priv); return error; } @@ -1789,8 +1736,6 @@ static int ravb_close(struct net_device *ndev) ravb_ring_free(ndev, RAVB_BE); ravb_ring_free(ndev, RAVB_NC); - ravb_mdio_release(priv); - return 0; } @@ -1942,6 +1887,51 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_set_features = ravb_set_features, }; +/* MDIO bus init function */ +static int ravb_mdio_init(struct ravb_private *priv) +{ + struct platform_device *pdev = priv->pdev; + struct device *dev = &pdev->dev; + int error; + + /* Bitbang init */ + priv->mdiobb.ops = &bb_ops; + + /* MII controller setting */ + priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb); + if (!priv->mii_bus) + return -ENOMEM; + + /* Hook up MII support for ethtool */ + priv->mii_bus->name = "ravb_mii"; + priv->mii_bus->parent = dev; + snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + + /* Register MDIO bus */ + error = of_mdiobus_register(priv->mii_bus, dev->of_node); + if (error) + goto out_free_bus; + + return 0; + +out_free_bus: + free_mdio_bitbang(priv->mii_bus); + return error; +} + +/* MDIO bus release function */ +static int ravb_mdio_release(struct ravb_private *priv) +{ + /* Unregister mdio bus */ + mdiobus_unregister(priv->mii_bus); + + /* Free bitbang info */ + free_mdio_bitbang(priv->mii_bus); + + return 0; +} + static const struct of_device_id ravb_match_table[] = { { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 }, @@ -2184,6 +2174,13 @@ static int ravb_probe(struct platform_device *pdev) eth_hw_addr_random(ndev); } + /* MDIO bus init */ + error = ravb_mdio_init(priv); + if (error) { + dev_err(&pdev->dev, "failed to initialize MDIO\n"); + goto out_dma_free; + } + netif_napi_add(ndev, &priv->napi[RAVB_BE], ravb_poll, 64); netif_napi_add(ndev, &priv->napi[RAVB_NC], ravb_poll, 64); @@ -2205,6 +2202,8 @@ static int ravb_probe(struct platform_device *pdev) out_napi_del: netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); +out_dma_free: dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); @@ -2236,6 +2235,7 @@ static int ravb_remove(struct platform_device *pdev) unregister_netdev(ndev); netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); pm_runtime_disable(&pdev->dev); free_netdev(ndev); platform_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 42458a46ffaf..9cc31f7e0df1 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -3099,9 +3099,10 @@ struct rocker_walk_data { struct rocker_port *port; }; -static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data) +static int rocker_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) { - struct rocker_walk_data *data = _data; + struct rocker_walk_data *data = (struct rocker_walk_data *)priv->data; int ret = 0; if (rocker_port_dev_check_under(lower_dev, data->rocker)) { @@ -3115,6 +3116,7 @@ static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data) struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, struct rocker *rocker) { + struct netdev_nested_priv priv; struct rocker_walk_data data; if (rocker_port_dev_check_under(dev, rocker)) @@ -3122,7 +3124,8 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, data.rocker = rocker; data.port = NULL; - netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &data); + priv.data = (void *)&data; + netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &priv); return data.port; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 2ac9dfb3462c..9e6d60e75f85 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -653,7 +653,6 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdev); - clk_disable_unprepare(priv->plat->stmmac_clk); clk_unregister_fixed_rate(priv->plat->stmmac_clk); pcim_iounmap_regions(pdev, BIT(0)); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 9c02fc754bf1..545696971f65 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -203,6 +203,8 @@ struct stmmac_priv { int eee_enabled; int eee_active; int tx_lpi_timer; + int tx_lpi_enabled; + int eee_tw_timer; unsigned int mode; unsigned int chain_mode; int extend_desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index ac5e8cc5fb9f..814879f91f76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -665,6 +665,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, edata->eee_enabled = priv->eee_enabled; edata->eee_active = priv->eee_active; edata->tx_lpi_timer = priv->tx_lpi_timer; + edata->tx_lpi_enabled = priv->tx_lpi_enabled; return phylink_ethtool_get_eee(priv->phylink, edata); } @@ -675,24 +676,26 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, struct stmmac_priv *priv = netdev_priv(dev); int ret; - if (!edata->eee_enabled) { + if (!priv->dma_cap.eee) + return -EOPNOTSUPP; + + if (priv->tx_lpi_enabled != edata->tx_lpi_enabled) + netdev_warn(priv->dev, + "Setting EEE tx-lpi is not supported\n"); + + if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); - } else { - /* We are asking for enabling the EEE but it is safe - * to verify all by invoking the eee_init function. - * In case of failure it will return an error. - */ - edata->eee_enabled = stmmac_eee_init(priv); - if (!edata->eee_enabled) - return -EOPNOTSUPP; - } ret = phylink_ethtool_set_eee(priv->phylink, edata); if (ret) return ret; - priv->eee_enabled = edata->eee_enabled; - priv->tx_lpi_timer = edata->tx_lpi_timer; + if (edata->eee_enabled && + priv->tx_lpi_timer != edata->tx_lpi_timer) { + priv->tx_lpi_timer = edata->tx_lpi_timer; + stmmac_eee_init(priv); + } + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 89b2b3472852..b56b13d64ab4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -94,7 +94,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | static int eee_timer = STMMAC_DEFAULT_LPI_TIMER; module_param(eee_timer, int, 0644); MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec"); -#define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x)) +#define STMMAC_LPI_T(x) (jiffies + usecs_to_jiffies(x)) /* By default the driver will use the ring mode to manage tx and rx descriptors, * but allow user to force to use the chain instead of the ring @@ -370,7 +370,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer); stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /** @@ -383,7 +383,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) */ bool stmmac_eee_init(struct stmmac_priv *priv) { - int tx_lpi_timer = priv->tx_lpi_timer; + int eee_tw_timer = priv->eee_tw_timer; /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. @@ -403,7 +403,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (priv->eee_enabled) { netdev_dbg(priv->dev, "disable EEE\n"); del_timer_sync(&priv->eee_ctrl_timer); - stmmac_set_eee_timer(priv, priv->hw, 0, tx_lpi_timer); + stmmac_set_eee_timer(priv, priv->hw, 0, eee_tw_timer); } mutex_unlock(&priv->lock); return false; @@ -411,11 +411,12 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (priv->eee_active && !priv->eee_enabled) { timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, - tx_lpi_timer); + eee_tw_timer); } + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + mutex_unlock(&priv->lock); netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); return true; @@ -930,6 +931,7 @@ static void stmmac_mac_link_down(struct phylink_config *config, stmmac_mac_set(priv, priv->ioaddr, false); priv->eee_active = false; + priv->tx_lpi_enabled = false; stmmac_eee_init(priv); stmmac_set_eee_pls(priv, priv->hw, false); } @@ -1027,6 +1029,7 @@ static void stmmac_mac_link_up(struct phylink_config *config, if (phy && priv->dma_cap.eee) { priv->eee_active = phy_init_eee(phy, 1) >= 0; priv->eee_enabled = stmmac_eee_init(priv); + priv->tx_lpi_enabled = priv->eee_enabled; stmmac_set_eee_pls(priv, priv->hw, true); } } @@ -2061,7 +2064,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) { stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /* We still have pending packets, let's call for a new scheduling */ @@ -2694,7 +2697,11 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) netdev_warn(priv->dev, "PTP init failed\n"); } - priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; + priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; + + /* Convert the timer from msec to usec */ + if (!priv->tx_lpi_timer) + priv->tx_lpi_timer = eee_timer * 1000; if (priv->use_riwt) { if (!priv->rx_riwt) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 803247d51fe9..55b0ddab1776 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -2,7 +2,7 @@ /* Written 1998-2001 by Donald Becker. - Current Maintainer: Roger Luethi <rl@hellgate.ch> + Current Maintainer: Kevin Brace <kevinbrace@bracecomputerlab.com> This software may be used and distributed according to the terms of the GNU General Public License (GPL), incorporated herein by reference. @@ -32,8 +32,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define DRV_NAME "via-rhine" -#define DRV_VERSION "1.5.1" -#define DRV_RELDATE "2010-10-09" #include <linux/types.h> @@ -117,10 +115,6 @@ static const int multicast_filter_limit = 32; #include <linux/uaccess.h> #include <linux/dmi.h> -/* These identify the driver base version and may not be removed. */ -static const char version[] = - "v1.10-LK" DRV_VERSION " " DRV_RELDATE " Written by Donald Becker"; - MODULE_AUTHOR("Donald Becker <becker@scyld.com>"); MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver"); MODULE_LICENSE("GPL"); @@ -243,7 +237,7 @@ enum rhine_revs { VT8233 = 0x60, /* Integrated MAC */ VT8235 = 0x74, /* Integrated MAC */ VT8237 = 0x78, /* Integrated MAC */ - VTunknown1 = 0x7C, + VT8251 = 0x7C, /* Integrated MAC */ VT6105 = 0x80, VT6105_B0 = 0x83, VT6105L = 0x8A, @@ -1051,11 +1045,6 @@ static int rhine_init_one_pci(struct pci_dev *pdev, u32 quirks = 0; #endif -/* when built into the kernel, we only print version if device is found */ -#ifndef MODULE - pr_info_once("%s\n", version); -#endif - rc = pci_enable_device(pdev); if (rc) goto err_out; @@ -1706,6 +1695,8 @@ static int rhine_open(struct net_device *dev) goto out_free_ring; alloc_tbufs(dev); + enable_mmio(rp->pioaddr, rp->quirks); + rhine_power_init(dev); rhine_chip_reset(dev); rhine_task_enable(rp); init_registers(dev); @@ -2294,7 +2285,6 @@ static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *i struct device *hwdev = dev->dev.parent; strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info)); } @@ -2616,9 +2606,6 @@ static int __init rhine_init(void) int ret_pci, ret_platform; /* when a module, this is printed whether or not devices are found in probe */ -#ifdef MODULE - pr_info("%s\n", version); -#endif if (dmi_check_system(rhine_dmi_table)) { /* these BIOSes fail at PXE boot if chip is in D3 */ avoid_D3 = true; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9159846b8b93..787ac2c8e74e 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1077,6 +1077,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rx_sa *rx_sa; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; + unsigned int len; sci_t sci; u32 hdr_pn; bool cbit; @@ -1232,9 +1233,10 @@ deliver: macsec_rxsc_put(rx_sc); skb_orphan(skb); + len = skb->len; ret = gro_cells_receive(&macsec->gro_cells, skb); if (ret == NET_RX_SUCCESS) - count_rx(dev, skb->len); + count_rx(dev, len); else macsec->secy.netdev->stats.rx_dropped++; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 726e4b240e7e..1c5a10b672fc 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -222,6 +222,7 @@ config MDIO_THUNDER depends on 64BIT depends on PCI select MDIO_CAVIUM + select MDIO_DEVRES help This driver supports the MDIO interfaces found on Cavium ThunderX SoCs when the MDIO bus device appears as a PCI diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 95dbe5e8e1d8..0f0960971800 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ -/* - * drivers/net/phy/realtek.c +/* drivers/net/phy/realtek.c * * Driver for Realtek PHYs * @@ -32,9 +31,9 @@ #define RTL8211F_TX_DELAY BIT(8) #define RTL8211F_RX_DELAY BIT(3) -#define RTL8211E_TX_DELAY BIT(1) -#define RTL8211E_RX_DELAY BIT(2) -#define RTL8211E_MODE_MII_GMII BIT(3) +#define RTL8211E_CTRL_DELAY BIT(13) +#define RTL8211E_TX_DELAY BIT(12) +#define RTL8211E_RX_DELAY BIT(11) #define RTL8201F_ISR 0x1e #define RTL8201F_IER 0x13 @@ -246,16 +245,16 @@ static int rtl8211e_config_init(struct phy_device *phydev) /* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */ switch (phydev->interface) { case PHY_INTERFACE_MODE_RGMII: - val = 0; + val = RTL8211E_CTRL_DELAY | 0; break; case PHY_INTERFACE_MODE_RGMII_ID: - val = RTL8211E_TX_DELAY | RTL8211E_RX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY; break; case PHY_INTERFACE_MODE_RGMII_RXID: - val = RTL8211E_RX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_RX_DELAY; break; case PHY_INTERFACE_MODE_RGMII_TXID: - val = RTL8211E_TX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY; break; default: /* the rest of the modes imply leaving delays as is. */ return 0; @@ -263,11 +262,12 @@ static int rtl8211e_config_init(struct phy_device *phydev) /* According to a sample driver there is a 0x1c config register on the * 0xa4 extension page (0x7) layout. It can be used to disable/enable - * the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. It can - * also be used to customize the whole configuration register: - * 8:6 = PHY Address, 5:4 = Auto-Negotiation, 3 = Interface Mode Select, - * 2 = RX Delay, 1 = TX Delay, 0 = SELRGV (see original PHY datasheet - * for details). + * the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. + * The configuration register definition: + * 14 = reserved + * 13 = Force Tx RX Delay controlled by bit12 bit11, + * 12 = RX Delay, 11 = TX Delay + * 10:0 = Test && debug settings reserved by realtek */ oldpage = phy_select_page(phydev, 0x7); if (oldpage < 0) @@ -277,7 +277,8 @@ static int rtl8211e_config_init(struct phy_device *phydev) if (ret) goto err_restore_page; - ret = __phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, + ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY + | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, val); err_restore_page: diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 8c1e02752ff6..bcc4a4c011f1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -287,7 +287,7 @@ inst_rollback: for (i--; i >= 0; i--) __team_option_inst_del_option(team, dst_opts[i]); - i = option_count - 1; + i = option_count; alloc_rollback: for (i--; i >= 0; i--) kfree(dst_opts[i]); @@ -2112,6 +2112,7 @@ static void team_setup_by_port(struct net_device *dev, dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; + dev->needed_headroom = port_dev->needed_headroom; dev->addr_len = port_dev->addr_len; dev->mtu = port_dev->mtu; memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index a38e868e44d4..5541f3faedbc 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1823,6 +1823,33 @@ static const struct driver_info belkin_info = { .status = ax88179_status, .link_reset = ax88179_link_reset, .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + +static const struct driver_info toshiba_info = { + .description = "Toshiba USB Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + +static const struct driver_info mct_info = { + .description = "MCT USB 3.0 Gigabit Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, @@ -1861,6 +1888,14 @@ static const struct usb_device_id products[] = { /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */ USB_DEVICE(0x050d, 0x0128), .driver_info = (unsigned long)&belkin_info, +}, { + /* Toshiba USB 3.0 GBit Ethernet Adapter */ + USB_DEVICE(0x0930, 0x0a13), + .driver_info = (unsigned long)&toshiba_info, +}, { + /* Magic Control Technology U3-A9003 USB 3.0 Gigabit Ethernet Adapter */ + USB_DEVICE(0x0711, 0x0179), + .driver_info = (unsigned long)&mct_info, }, { }, }; diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index e92cb51a2c77..060a8a03e6c4 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -360,28 +360,47 @@ fail: } #endif /* PEGASUS_WRITE_EEPROM */ -static inline void get_node_id(pegasus_t *pegasus, __u8 *id) +static inline int get_node_id(pegasus_t *pegasus, u8 *id) { - int i; - __u16 w16; + int i, ret; + u16 w16; for (i = 0; i < 3; i++) { - read_eprom_word(pegasus, i, &w16); + ret = read_eprom_word(pegasus, i, &w16); + if (ret < 0) + return ret; ((__le16 *) id)[i] = cpu_to_le16(w16); } + + return 0; } static void set_ethernet_addr(pegasus_t *pegasus) { - __u8 node_id[6]; + int ret; + u8 node_id[6]; if (pegasus->features & PEGASUS_II) { - get_registers(pegasus, 0x10, sizeof(node_id), node_id); + ret = get_registers(pegasus, 0x10, sizeof(node_id), node_id); + if (ret < 0) + goto err; } else { - get_node_id(pegasus, node_id); - set_registers(pegasus, EthID, sizeof(node_id), node_id); + ret = get_node_id(pegasus, node_id); + if (ret < 0) + goto err; + ret = set_registers(pegasus, EthID, sizeof(node_id), node_id); + if (ret < 0) + goto err; } + memcpy(pegasus->net->dev_addr, node_id, sizeof(node_id)); + + return; +err: + eth_hw_addr_random(pegasus->net); + dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + + return; } static inline int reset_mac(pegasus_t *pegasus) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 07c42c0719f5..5ca1356b8656 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1375,6 +1375,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ + {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 733f120c852b..9d079dc2a535 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -274,12 +274,20 @@ static int write_mii_word(rtl8150_t * dev, u8 phy, __u8 indx, u16 reg) return 1; } -static inline void set_ethernet_addr(rtl8150_t * dev) +static void set_ethernet_addr(rtl8150_t *dev) { - u8 node_id[6]; + u8 node_id[ETH_ALEN]; + int ret; + + ret = get_registers(dev, IDR, sizeof(node_id), node_id); - get_registers(dev, IDR, sizeof(node_id), node_id); - memcpy(dev->netdev->dev_addr, node_id, sizeof(node_id)); + if (ret == sizeof(node_id)) { + ether_addr_copy(dev->netdev->dev_addr, node_id); + } else { + eth_hw_addr_random(dev->netdev); + netdev_notice(dev->netdev, "Assigned a random MAC address: %pM\n", + dev->netdev->dev_addr); + } } static int rtl8150_set_mac_address(struct net_device *netdev, void *p) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 263b005981bd..668685c09e65 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; +#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; size_t offset; @@ -2531,7 +2536,8 @@ static int virtnet_set_features(struct net_device *dev, if (features & NETIF_F_LRO) offloads = vi->guest_offloads_capable; else - offloads = 0; + offloads = vi->guest_offloads_capable & + ~GUEST_OFFLOAD_LRO_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 2818015324b8..336504b7531d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1032,7 +1032,6 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, /* Use temporary descriptor to avoid touching bits multiple times */ union Vmxnet3_GenericDesc tempTxDesc; #endif - struct udphdr *udph; count = txd_estimate(skb); @@ -1135,8 +1134,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, gdesc->txd.om = VMXNET3_OM_ENCAP; gdesc->txd.msscof = ctx.mss; - udph = udp_hdr(skb); - if (udph->check) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) gdesc->txd.oco = 1; } else { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; @@ -3371,6 +3369,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, .ndo_change_mtu = vmxnet3_change_mtu, .ndo_fix_features = vmxnet3_fix_features, .ndo_set_features = vmxnet3_set_features, + .ndo_features_check = vmxnet3_features_check, .ndo_get_stats64 = vmxnet3_get_stats64, .ndo_tx_timeout = vmxnet3_tx_timeout, .ndo_set_rx_mode = vmxnet3_set_mc, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 1014693a5ceb..7ec8652f2c26 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -267,6 +267,34 @@ netdev_features_t vmxnet3_fix_features(struct net_device *netdev, return features; } +netdev_features_t vmxnet3_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + /* Validate if the tunneled packet is being offloaded by the device */ + if (VMXNET3_VERSION_GE_4(adapter) && + skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) { + u8 l4_proto = 0; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_hdr(skb)->nexthdr; + break; + default: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + + if (l4_proto != IPPROTO_UDP) + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + return features; +} + static void vmxnet3_enable_encap_offloads(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 5d2b062215a2..d958b92c9429 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -470,6 +470,10 @@ vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter); netdev_features_t vmxnet3_fix_features(struct net_device *netdev, netdev_features_t features); +netdev_features_t +vmxnet3_features_check(struct sk_buff *skb, + struct net_device *netdev, netdev_features_t features); + int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features); diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 7ee980575208..c418767a890a 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -464,7 +464,6 @@ static int x25_asy_open(struct net_device *dev) { struct x25_asy *sl = netdev_priv(dev); unsigned long len; - int err; if (sl->tty == NULL) return -ENODEV; @@ -490,14 +489,7 @@ static int x25_asy_open(struct net_device *dev) sl->xleft = 0; sl->flags &= (1 << SLF_INUSE); /* Clear ESCAPE & ERROR flags */ - netif_start_queue(dev); - - /* - * Now attach LAPB - */ - err = lapb_register(dev, &x25_asy_callbacks); - if (err == LAPB_OK) - return 0; + return 0; /* Cleanup */ kfree(sl->xbuff); @@ -519,7 +511,6 @@ static int x25_asy_close(struct net_device *dev) if (sl->tty) clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); - netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; spin_unlock(&sl->lock); @@ -604,7 +595,6 @@ static int x25_asy_open_tty(struct tty_struct *tty) static void x25_asy_close_tty(struct tty_struct *tty) { struct x25_asy *sl = tty->disc_data; - int err; /* First make sure we're connected. */ if (!sl || sl->magic != X25_ASY_MAGIC) @@ -615,11 +605,6 @@ static void x25_asy_close_tty(struct tty_struct *tty) dev_close(sl->dev); rtnl_unlock(); - err = lapb_unregister(sl->dev); - if (err != LAPB_OK) - pr_err("%s: lapb_unregister error: %d\n", - __func__, err); - tty->disc_data = NULL; sl->tty = NULL; x25_asy_free(sl); @@ -722,15 +707,39 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file, static int x25_asy_open_dev(struct net_device *dev) { + int err; struct x25_asy *sl = netdev_priv(dev); if (sl->tty == NULL) return -ENODEV; + + err = lapb_register(dev, &x25_asy_callbacks); + if (err != LAPB_OK) + return -ENOMEM; + + netif_start_queue(dev); + + return 0; +} + +static int x25_asy_close_dev(struct net_device *dev) +{ + int err; + + netif_stop_queue(dev); + + err = lapb_unregister(dev); + if (err != LAPB_OK) + pr_err("%s: lapb_unregister error: %d\n", + __func__, err); + + x25_asy_close(dev); + return 0; } static const struct net_device_ops x25_asy_netdev_ops = { .ndo_open = x25_asy_open_dev, - .ndo_stop = x25_asy_close, + .ndo_stop = x25_asy_close_dev, .ndo_start_xmit = x25_asy_xmit, .ndo_tx_timeout = x25_asy_timeout, .ndo_change_mtu = x25_asy_change_mtu, diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c index fc1ebabfebac..1f57b43693bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c @@ -460,7 +460,7 @@ void mt7615_init_device(struct mt7615_dev *dev) dev->mphy.sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING; dev->mphy.sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING; dev->mphy.sband_5g.sband.vht_cap.cap |= - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; mt7615_cap_dbdc_disable(dev); dev->phy.dfs_state = -1; diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 6aafff9d4231..e013ebe3079c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -671,9 +671,10 @@ bool qtnf_netdev_is_qtn(const struct net_device *ndev) return ndev->netdev_ops == &qtnf_netdev_ops; } -static int qtnf_check_br_ports(struct net_device *dev, void *data) +static int qtnf_check_br_ports(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct net_device *ndev = data; + struct net_device *ndev = (struct net_device *)priv->data; if (dev != ndev && netdev_port_same_parent_id(dev, ndev)) return -ENOTSUPP; @@ -686,6 +687,9 @@ static int qtnf_core_netdevice_event(struct notifier_block *nb, { struct net_device *ndev = netdev_notifier_info_to_dev(ptr); const struct netdev_notifier_changeupper_info *info; + struct netdev_nested_priv priv = { + .data = (void *)ndev, + }; struct net_device *brdev; struct qtnf_vif *vif; struct qtnf_bus *bus; @@ -725,7 +729,7 @@ static int qtnf_core_netdevice_event(struct notifier_block *nb, } else { ret = netdev_walk_all_lower_dev(brdev, qtnf_check_br_ports, - ndev); + &priv); } break; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8f9d61e0729f..893e29624c16 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3265,8 +3265,10 @@ static int nvme_dev_open(struct inode *inode, struct file *file) } nvme_get_ctrl(ctrl); - if (!try_module_get(ctrl->ops->module)) + if (!try_module_get(ctrl->ops->module)) { + nvme_put_ctrl(ctrl); return -EINVAL; + } file->private_data = ctrl; return 0; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8f4f29f18b8c..d6a3e1487354 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -913,12 +913,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) else flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; - /* can't zcopy slab pages */ - if (unlikely(PageSlab(page))) { - ret = sock_no_sendpage(queue->sock, page, offset, len, + if (sendpage_ok(page)) { + ret = kernel_sendpage(queue->sock, page, offset, len, flags); } else { - ret = kernel_sendpage(queue->sock, page, offset, len, + ret = sock_no_sendpage(queue->sock, page, offset, len, flags); } if (ret <= 0) diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index 190e4a6186ef..f64b82824db2 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -439,7 +439,9 @@ static int olpc_ec_probe(struct platform_device *pdev) &config); if (IS_ERR(ec->dcon_rdev)) { dev_err(&pdev->dev, "failed to register DCON regulator\n"); - return PTR_ERR(ec->dcon_rdev); + err = PTR_ERR(ec->dcon_rdev); + kfree(ec); + return err; } ec->dbgfs_dir = olpc_ec_setup_debugfs(); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 40219bba6801..0d91d136bc3b 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -469,6 +469,7 @@ config FUJITSU_LAPTOP depends on BACKLIGHT_CLASS_DEVICE depends on ACPI_VIDEO || ACPI_VIDEO = n select INPUT_SPARSEKMAP + select NEW_LEDS select LEDS_CLASS help This is a driver for laptops built by Fujitsu: @@ -1112,6 +1113,7 @@ config LG_LAPTOP depends on ACPI_WMI depends on INPUT select INPUT_SPARSEKMAP + select NEW_LEDS select LEDS_CLASS help This driver adds support for hotkeys as well as control of keyboard diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index b2e3d1e3b3e9..1d9fbabd02fb 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -115,6 +115,10 @@ static struct quirk_entry quirk_asus_vendor_backlight = { .wmi_backlight_set_devstate = true, }; +static struct quirk_entry quirk_asus_use_kbd_dock_devid = { + .use_kbd_dock_devid = true, +}; + static int dmi_matched(const struct dmi_system_id *dmi) { pr_info("Identified laptop model '%s'\n", dmi->ident); @@ -488,6 +492,34 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_vendor_backlight, }, + { + .callback = dmi_matched, + .ident = "Asus Transformer T100TA / T100HA / T100CHI", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + /* Match *T100* */ + DMI_MATCH(DMI_PRODUCT_NAME, "T100"), + }, + .driver_data = &quirk_asus_use_kbd_dock_devid, + }, + { + .callback = dmi_matched, + .ident = "Asus Transformer T101HA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "T101HA"), + }, + .driver_data = &quirk_asus_use_kbd_dock_devid, + }, + { + .callback = dmi_matched, + .ident = "Asus Transformer T200TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "T200TA"), + }, + .driver_data = &quirk_asus_use_kbd_dock_devid, + }, {}, }; @@ -593,33 +625,9 @@ static struct asus_wmi_driver asus_nb_wmi_driver = { .detect_quirks = asus_nb_wmi_quirks, }; -static const struct dmi_system_id asus_nb_wmi_blacklist[] __initconst = { - { - /* - * asus-nb-wm adds no functionality. The T100TA has a detachable - * USB kbd, so no hotkeys and it has no WMI rfkill; and loading - * asus-nb-wm causes the camera LED to turn and _stay_ on. - */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), - }, - }, - { - /* The Asus T200TA has the same issue as the T100TA */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T200TA"), - }, - }, - {} /* Terminating entry */ -}; static int __init asus_nb_wmi_init(void) { - if (dmi_check_system(asus_nb_wmi_blacklist)) - return -ENODEV; - return asus_wmi_register_driver(&asus_nb_wmi_driver); } diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 8f4acdc06b13..39e1a6396e08 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -365,12 +365,14 @@ static int asus_wmi_input_init(struct asus_wmi *asus) if (err) goto err_free_dev; - result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_KBD_DOCK); - if (result >= 0) { - input_set_capability(asus->inputdev, EV_SW, SW_TABLET_MODE); - input_report_switch(asus->inputdev, SW_TABLET_MODE, !result); - } else if (result != -ENODEV) { - pr_err("Error checking for keyboard-dock: %d\n", result); + if (asus->driver->quirks->use_kbd_dock_devid) { + result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_KBD_DOCK); + if (result >= 0) { + input_set_capability(asus->inputdev, EV_SW, SW_TABLET_MODE); + input_report_switch(asus->inputdev, SW_TABLET_MODE, !result); + } else if (result != -ENODEV) { + pr_err("Error checking for keyboard-dock: %d\n", result); + } } err = input_register_device(asus->inputdev); @@ -442,6 +444,7 @@ static int asus_wmi_battery_add(struct power_supply *battery) */ if (strcmp(battery->desc->name, "BAT0") != 0 && strcmp(battery->desc->name, "BAT1") != 0 && + strcmp(battery->desc->name, "BATC") != 0 && strcmp(battery->desc->name, "BATT") != 0) return -ENODEV; @@ -2114,7 +2117,7 @@ static void asus_wmi_handle_event_code(int code, struct asus_wmi *asus) return; } - if (code == NOTIFY_KBD_DOCK_CHANGE) { + if (asus->driver->quirks->use_kbd_dock_devid && code == NOTIFY_KBD_DOCK_CHANGE) { result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_KBD_DOCK); if (result >= 0) { diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 4f31b68642a0..1a95c172f94b 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -33,6 +33,7 @@ struct quirk_entry { bool wmi_backlight_native; bool wmi_backlight_set_devstate; bool wmi_force_als_set; + bool use_kbd_dock_devid; int wapf; /* * For machines with AMD graphic chips, it will send out WMI event diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index e85d8e58320c..f5901b0b07cd 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -167,20 +167,54 @@ static bool intel_vbtn_has_buttons(acpi_handle handle) return ACPI_SUCCESS(status); } +/* + * There are several laptops (non 2-in-1) models out there which support VGBS, + * but simply always return 0, which we translate to SW_TABLET_MODE=1. This in + * turn causes userspace (libinput) to suppress events from the builtin + * keyboard and touchpad, making the laptop essentially unusable. + * + * Since the problem of wrongly reporting SW_TABLET_MODE=1 in combination + * with libinput, leads to a non-usable system. Where as OTOH many people will + * not even notice when SW_TABLET_MODE is not being reported, a DMI based allow + * list is used here. This list mainly matches on the chassis-type of 2-in-1s. + * + * There are also some 2-in-1s which use the intel-vbtn ACPI interface to report + * SW_TABLET_MODE with a chassis-type of 8 ("Portable") or 10 ("Notebook"), + * these are matched on a per model basis, since many normal laptops with a + * possible broken VGBS ACPI-method also use these chassis-types. + */ +static const struct dmi_system_id dmi_switches_allow_list[] = { + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */), + }, + }, + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Stream x360 Convertible PC 11"), + }, + }, + {} /* Array terminator */ +}; + static bool intel_vbtn_has_switches(acpi_handle handle) { - const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); unsigned long long vgbs; acpi_status status; - /* - * Some normal laptops have a VGBS method despite being non-convertible - * and their VGBS method always returns 0, causing detect_tablet_mode() - * to report SW_TABLET_MODE=1 to userspace, which causes issues. - * These laptops have a DMI chassis_type of 9 ("Laptop"), do not report - * switches on any devices with a DMI chassis_type of 9. - */ - if (chassis_type && strcmp(chassis_type, "9") == 0) + if (!dmi_check_system(dmi_switches_allow_list)) return false; status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs); diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel_pmc_core_pltdrv.c index 731281855cc8..73797680b895 100644 --- a/drivers/platform/x86/intel_pmc_core_pltdrv.c +++ b/drivers/platform/x86/intel_pmc_core_pltdrv.c @@ -20,15 +20,10 @@ static void intel_pmc_core_release(struct device *dev) { - /* Nothing to do. */ + kfree(dev); } -static struct platform_device pmc_core_device = { - .name = "intel_pmc_core", - .dev = { - .release = intel_pmc_core_release, - }, -}; +static struct platform_device *pmc_core_device; /* * intel_pmc_core_platform_ids is the list of platforms where we want to @@ -52,6 +47,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pmc_core_platform_ids); static int __init pmc_core_platform_init(void) { + int retval; + /* Skip creating the platform device if ACPI already has a device */ if (acpi_dev_present("INT33A1", NULL, -1)) return -ENODEV; @@ -59,12 +56,23 @@ static int __init pmc_core_platform_init(void) if (!x86_match_cpu(intel_pmc_core_platform_ids)) return -ENODEV; - return platform_device_register(&pmc_core_device); + pmc_core_device = kzalloc(sizeof(*pmc_core_device), GFP_KERNEL); + if (!pmc_core_device) + return -ENOMEM; + + pmc_core_device->name = "intel_pmc_core"; + pmc_core_device->dev.release = intel_pmc_core_release; + + retval = platform_device_register(pmc_core_device); + if (retval) + kfree(pmc_core_device); + + return retval; } static void __exit pmc_core_platform_exit(void) { - platform_device_unregister(&pmc_core_device); + platform_device_unregister(pmc_core_device); } module_init(pmc_core_platform_init); diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 8cf8c1be2666..1506ec0a4777 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -171,7 +171,6 @@ #define MLXPLAT_CPLD_NR_NONE -1 #define MLXPLAT_CPLD_PSU_DEFAULT_NR 10 #define MLXPLAT_CPLD_PSU_MSNXXXX_NR 4 -#define MLXPLAT_CPLD_PSU_MSNXXXX_NR2 3 #define MLXPLAT_CPLD_FAN1_DEFAULT_NR 11 #define MLXPLAT_CPLD_FAN2_DEFAULT_NR 12 #define MLXPLAT_CPLD_FAN3_DEFAULT_NR 13 @@ -347,6 +346,15 @@ static struct i2c_board_info mlxplat_mlxcpld_pwr[] = { }, }; +static struct i2c_board_info mlxplat_mlxcpld_ext_pwr[] = { + { + I2C_BOARD_INFO("dps460", 0x5b), + }, + { + I2C_BOARD_INFO("dps460", 0x5a), + }, +}; + static struct i2c_board_info mlxplat_mlxcpld_fan[] = { { I2C_BOARD_INFO("24c32", 0x50), @@ -921,15 +929,15 @@ static struct mlxreg_core_data mlxplat_mlxcpld_ext_pwr_items_data[] = { .label = "pwr3", .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET, .mask = BIT(2), - .hpdev.brdinfo = &mlxplat_mlxcpld_pwr[0], - .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR2, + .hpdev.brdinfo = &mlxplat_mlxcpld_ext_pwr[0], + .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR, }, { .label = "pwr4", .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET, .mask = BIT(3), - .hpdev.brdinfo = &mlxplat_mlxcpld_pwr[1], - .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR2, + .hpdev.brdinfo = &mlxplat_mlxcpld_ext_pwr[1], + .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR, }, }; diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index 6aff6cf41414..c37349f97bb8 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -32,7 +32,7 @@ #define APU2_GPIO_REG_LED3 AMD_FCH_GPIO_REG_GPIO59_DEVSLP1 #define APU2_GPIO_REG_MODESW AMD_FCH_GPIO_REG_GPIO32_GE1 #define APU2_GPIO_REG_SIMSWAP AMD_FCH_GPIO_REG_GPIO33_GE2 -#define APU2_GPIO_REG_MPCIE2 AMD_FCH_GPIO_REG_GPIO59_DEVSLP0 +#define APU2_GPIO_REG_MPCIE2 AMD_FCH_GPIO_REG_GPIO55_DEVSLP0 #define APU2_GPIO_REG_MPCIE3 AMD_FCH_GPIO_REG_GPIO51 /* Order in which the GPIO lines are defined in the register list */ diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9c4df41687a3..eae3579f106f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2569,7 +2569,7 @@ static void hotkey_compare_and_issue_event(struct tp_nvram_state *oldn, */ static int hotkey_kthread(void *data) { - struct tp_nvram_state s[2]; + struct tp_nvram_state s[2] = { 0 }; u32 poll_mask, event_mask; unsigned int si, so; unsigned long t; @@ -6829,8 +6829,10 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle) list_for_each_entry(child, &device->children, node) { acpi_status status = acpi_evaluate_object(child->handle, "_BCL", NULL, &buffer); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { + buffer.length = ACPI_ALLOCATE_BUFFER; continue; + } obj = (union acpi_object *)buffer.pointer; if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) { diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 5c223015ee71..dda60f89c951 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -373,6 +373,23 @@ static const struct ts_dmi_data jumper_ezpad_mini3_data = { .properties = jumper_ezpad_mini3_props, }; +static const struct property_entry mpman_converter9_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 8), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1664), + PROPERTY_ENTRY_U32("touchscreen-size-y", 880), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-mpman-converter9.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data mpman_converter9_data = { + .acpi_name = "MSSL1680:00", + .properties = mpman_converter9_props, +}; + static const struct property_entry mpman_mpwin895cl_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 3), PROPERTY_ENTRY_U32("touchscreen-min-y", 9), @@ -977,6 +994,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* MP Man Converter 9 */ + .driver_data = (void *)&mpman_converter9_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MPMAN"), + DMI_MATCH(DMI_PRODUCT_NAME, "Converter9"), + }, + }, + { /* MP Man MPWIN895CL */ .driver_data = (void *)&mpman_mpwin895cl_data, .matches = { diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 37e5d4e48c2f..83f14b2c8804 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -128,7 +128,7 @@ static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv) * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ - if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg))) + if (!recv && sendpage_ok(sg_page(sg))) return; if (recv) { diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 4271c408103e..d7d32b656102 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -30,9 +30,7 @@ config IFCVF be called ifcvf. config MLX5_VDPA - bool "MLX5 VDPA support library for ConnectX devices" - depends on MLX5_CORE - default n + bool help Support library for Mellanox VDPA drivers. Provides code that is common for all types of VDPA drivers. The following drivers are planned: @@ -40,7 +38,8 @@ config MLX5_VDPA config MLX5_VDPA_NET tristate "vDPA driver for ConnectX devices" - depends on MLX5_VDPA + select MLX5_VDPA + depends on MLX5_CORE default n help VDPA network driver for ConnectX6 and newer. Provides offloading diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 70676a6d1691..74264e590695 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1133,15 +1133,17 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m if (!mvq->initialized) return; - if (query_virtqueue(ndev, mvq, &attr)) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); - return; - } if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) return; if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); + + if (query_virtqueue(ndev, mvq, &attr)) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); + return; + } + mvq->avail_idx = attr.available_index; } static void suspend_vqs(struct mlx5_vdpa_net *ndev) @@ -1411,8 +1413,14 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa struct mlx5_virtq_attr attr; int err; - if (!mvq->initialized) - return -EAGAIN; + /* If the virtq object was destroyed, use the value saved at + * the last minute of suspend_vq. This caters for userspace + * that cares about emulating the index after vq is stopped. + */ + if (!mvq->initialized) { + state->avail_index = mvq->avail_idx; + return 0; + } err = query_virtqueue(ndev, mvq, &attr); if (err) { diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 796fe979f997..62a9bb0efc55 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -565,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, perm_to_iommu_flags(perm)); } + if (r) + vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + return r; } @@ -592,21 +595,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_dev *dev = &v->vdev; struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; - unsigned long list_size = PAGE_SIZE / sizeof(struct page *); + struct vm_area_struct **vmas; unsigned int gup_flags = FOLL_LONGTERM; - unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long map_pfn, last_pfn = 0; + unsigned long npages, lock_limit; + unsigned long i, nmap = 0; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; - page_list = (struct page **) __get_free_page(GFP_KERNEL); - if (!page_list) - return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; @@ -614,61 +615,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; + page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!page_list || !vmas) { + ret = -ENOMEM; + goto free; + } + mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; - iova &= PAGE_MASK; + pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, + page_list, vmas); + if (npages != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } + goto unlock; + } - while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) - goto out; - - if (!last_pfn) - map_pfn = page_to_pfn(page_list[0]); - - for (i = 0; i < ret; i++) { - unsigned long this_pfn = page_to_pfn(page_list[i]); - u64 csize; - - if (last_pfn && (this_pfn != last_pfn + 1)) { - /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) - goto out; - map_pfn = this_pfn; - iova += csize; + iova &= PAGE_MASK; + map_pfn = page_to_pfn(page_list[0]); + + /* One more iteration to avoid extra vdpa_map() call out of loop. */ + for (i = 0; i <= npages; i++) { + unsigned long this_pfn; + u64 csize; + + /* The last chunk may have no valid PFN next to it */ + this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; + + if (last_pfn && (this_pfn == -1UL || + this_pfn != last_pfn + 1)) { + /* Pin a contiguous chunk of memory */ + csize = last_pfn - map_pfn + 1; + ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, + map_pfn << PAGE_SHIFT, + msg->perm); + if (ret) { + /* + * Unpin the rest chunks of memory on the + * flight with no corresponding vdpa_map() + * calls having been made yet. On the other + * hand, vdpa_unmap() in the failure path + * is in charge of accounting the number of + * pinned pages for its own. + * This asymmetrical pattern of accounting + * is for efficiency to pin all pages at + * once, while there is no other callsite + * of vdpa_map() than here above. + */ + unpin_user_pages(&page_list[nmap], + npages - nmap); + goto out; } - - last_pfn = this_pfn; + atomic64_add(csize, &dev->mm->pinned_vm); + nmap += csize; + iova += csize << PAGE_SHIFT; + map_pfn = this_pfn; } - - cur_base += ret << PAGE_SHIFT; - npages -= ret; + last_pfn = this_pfn; } - /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, msg->perm); + WARN_ON(nmap != npages); out: - if (ret) { + if (ret) vhost_vdpa_unmap(v, msg->iova, msg->size); - atomic64_sub(npages, &dev->mm->pinned_vm); - } +unlock: mmap_read_unlock(dev->mm); - free_page((unsigned long)page_list); +free: + kvfree(vmas); + kvfree(page_list); return ret; } @@ -810,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) err_init_iotlb: vhost_dev_cleanup(&v->vdev); + kfree(vqs); err: atomic_dec(&v->opened); return r; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b45519ca66a7..9ad45e1d27f0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, vring_used_t __user *used) { + /* If an IOTLB device is present, the vring addresses are + * GIOVAs. Access validation occurs at prefetch time. */ + if (vq->iotlb) + return true; + return access_ok(desc, vhost_get_desc_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(used, vhost_get_used_size(vq, num)); @@ -1365,6 +1370,20 @@ bool vhost_log_access_ok(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_log_access_ok); +static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, + void __user *log_base, + bool log_used, + u64 log_addr) +{ + /* If an IOTLB device is present, log_addr is a GIOVA that + * will never be logged by log_used(). */ + if (vq->iotlb) + return true; + + return !log_used || log_access_ok(log_base, log_addr, + vhost_get_used_size(vq, vq->num)); +} + /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static bool vq_log_access_ok(struct vhost_virtqueue *vq, @@ -1372,8 +1391,7 @@ static bool vq_log_access_ok(struct vhost_virtqueue *vq, { return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && - (!vq->log_used || log_access_ok(log_base, vq->log_addr, - vhost_get_used_size(vq, vq->num))); + vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); } /* Can we start vq? */ @@ -1383,10 +1401,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) if (!vq_log_access_ok(vq, vq->log_base)) return false; - /* Access validation occurs at prefetch time with IOTLB */ - if (vq->iotlb) - return true; - return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); @@ -1516,10 +1530,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d, return -EINVAL; /* Also validate log access for used ring if enabled. */ - if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && - !log_access_ok(vq->log_base, a.log_guest_addr, - sizeof *vq->used + - vq->num * sizeof *vq->used->ring)) + if (!vq_log_used_access_ok(vq, vq->log_base, + a.flags & (0x1 << VHOST_VRING_F_LOG), + a.log_guest_addr)) return -EINVAL; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1d13d2e882ad..0fe8844b4bee 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -810,14 +810,32 @@ void afs_evict_inode(struct inode *inode) static void afs_setattr_success(struct afs_operation *op) { - struct inode *inode = &op->file[0].vnode->vfs_inode; + struct afs_vnode_param *vp = &op->file[0]; + struct inode *inode = &vp->vnode->vfs_inode; + loff_t old_i_size = i_size_read(inode); + + op->setattr.old_i_size = old_i_size; + afs_vnode_commit_status(op, vp); + /* inode->i_size has now been changed. */ + + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t size = op->setattr.attr->ia_size; + if (size > old_i_size) + pagecache_isize_extended(inode, old_i_size, size); + } +} + +static void afs_setattr_edit_file(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct inode *inode = &vp->vnode->vfs_inode; - afs_vnode_commit_status(op, &op->file[0]); if (op->setattr.attr->ia_valid & ATTR_SIZE) { - loff_t i_size = inode->i_size, size = op->setattr.attr->ia_size; - if (size > i_size) - pagecache_isize_extended(inode, i_size, size); - truncate_pagecache(inode, size); + loff_t size = op->setattr.attr->ia_size; + loff_t i_size = op->setattr.old_i_size; + + if (size < i_size) + truncate_pagecache(inode, size); } } @@ -825,6 +843,7 @@ static const struct afs_operation_ops afs_setattr_operation = { .issue_afs_rpc = afs_fs_setattr, .issue_yfs_rpc = yfs_fs_setattr, .success = afs_setattr_success, + .edit_dir = afs_setattr_edit_file, }; /* @@ -863,11 +882,16 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); + /* Prevent any new writebacks from starting whilst we do this. */ + down_write(&vnode->validate_lock); + op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ? afs_file_key(attr->ia_file) : NULL), vnode->volume); - if (IS_ERR(op)) - return PTR_ERR(op); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto out_unlock; + } afs_op_set_vnode(op, 0, vnode); op->setattr.attr = attr; @@ -880,5 +904,10 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) op->file[0].update_ctime = 1; op->ops = &afs_setattr_operation; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + +out_unlock: + up_write(&vnode->validate_lock); + _leave(" = %d", ret); + return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 18042b7dab6a..e5f0446f27e5 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -812,6 +812,7 @@ struct afs_operation { } store; struct { struct iattr *attr; + loff_t old_i_size; } setattr; struct afs_acl *acl; struct yfs_acl *yacl; diff --git a/fs/afs/write.c b/fs/afs/write.c index 4b2265cb1891..da12abd6db21 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -738,11 +738,21 @@ static int afs_writepages_region(struct address_space *mapping, int afs_writepages(struct address_space *mapping, struct writeback_control *wbc) { + struct afs_vnode *vnode = AFS_FS_I(mapping->host); pgoff_t start, end, next; int ret; _enter(""); + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + if (wbc->sync_mode == WB_SYNC_ALL) + down_read(&vnode->validate_lock); + else if (!down_read_trylock(&vnode->validate_lock)) + return 0; + if (wbc->range_cyclic) { start = mapping->writeback_index; end = -1; @@ -762,6 +772,7 @@ int afs_writepages(struct address_space *mapping, ret = afs_writepages_region(mapping, wbc, start, end, &next); } + up_read(&vnode->validate_lock); _leave(" = %d", ret); return ret; } diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c index 03d0824fc368..5a2f119b7e8c 100644 --- a/fs/exfat/cache.c +++ b/fs/exfat/cache.c @@ -17,7 +17,6 @@ #include "exfat_raw.h" #include "exfat_fs.h" -#define EXFAT_CACHE_VALID 0 #define EXFAT_MAX_CACHE 16 struct exfat_cache { @@ -61,16 +60,6 @@ void exfat_cache_shutdown(void) kmem_cache_destroy(exfat_cachep); } -void exfat_cache_init_inode(struct inode *inode) -{ - struct exfat_inode_info *ei = EXFAT_I(inode); - - spin_lock_init(&ei->cache_lru_lock); - ei->nr_caches = 0; - ei->cache_valid_id = EXFAT_CACHE_VALID + 1; - INIT_LIST_HEAD(&ei->cache_lru); -} - static inline struct exfat_cache *exfat_cache_alloc(void) { return kmem_cache_alloc(exfat_cachep, GFP_NOFS); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 95d717f8620c..c013fe931d9c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -248,6 +248,8 @@ struct exfat_sb_info { struct rcu_head rcu; }; +#define EXFAT_CACHE_VALID 0 + /* * EXFAT file system inode in-memory data */ @@ -428,7 +430,6 @@ extern const struct dentry_operations exfat_utf8_dentry_ops; /* cache.c */ int exfat_cache_init(void); void exfat_cache_shutdown(void); -void exfat_cache_init_inode(struct inode *inode); void exfat_cache_inval_inode(struct inode *inode); int exfat_get_cluster(struct inode *inode, unsigned int cluster, unsigned int *fclus, unsigned int *dclus, diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 7f90204adef5..a6de17cac3df 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -611,8 +611,6 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) ei->i_crtime = info->crtime; inode->i_atime = info->atime; - exfat_cache_init_inode(inode); - return 0; } diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index e73f20f66cb2..c94ac239f740 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -578,7 +578,8 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, i_pos = exfat_make_i_pos(&info); inode = exfat_build_inode(sb, &info, i_pos); - if (IS_ERR(inode)) + err = PTR_ERR_OR_ZERO(inode); + if (err) goto unlock; inode_inc_iversion(inode); @@ -745,10 +746,9 @@ static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry, i_pos = exfat_make_i_pos(&info); inode = exfat_build_inode(sb, &info, i_pos); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); + err = PTR_ERR_OR_ZERO(inode); + if (err) goto unlock; - } i_mode = inode->i_mode; alias = d_find_alias(inode); @@ -890,10 +890,9 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) i_pos = exfat_make_i_pos(&info); inode = exfat_build_inode(sb, &info, i_pos); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); + err = PTR_ERR_OR_ZERO(inode); + if (err) goto unlock; - } inode_inc_iversion(inode); inode->i_mtime = inode->i_atime = inode->i_ctime = diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 3b6a1659892f..60b941ba557b 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -376,7 +376,6 @@ static int exfat_read_root(struct inode *inode) inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = current_time(inode); exfat_truncate_atime(&inode->i_atime); - exfat_cache_init_inode(inode); return 0; } @@ -763,6 +762,10 @@ static void exfat_inode_init_once(void *foo) { struct exfat_inode_info *ei = (struct exfat_inode_info *)foo; + spin_lock_init(&ei->cache_lru_lock); + ei->nr_caches = 0; + ei->cache_valid_id = EXFAT_CACHE_VALID + 1; + INIT_LIST_HEAD(&ei->cache_lru); INIT_HLIST_NODE(&ei->i_hash_fat); inode_init_once(&ei->vfs_inode); } diff --git a/fs/splice.c b/fs/splice.c index c3d00dfc7344..ce75aec52274 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -526,6 +526,22 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des return 1; } +/* We know we have a pipe buffer, but maybe it's empty? */ +static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) +{ + unsigned int tail = pipe->tail; + unsigned int mask = pipe->ring_size - 1; + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + + if (unlikely(!buf->len)) { + pipe_buf_release(pipe, buf); + pipe->tail = tail+1; + return true; + } + + return false; +} + /** * splice_from_pipe_next - wait for some data to splice from * @pipe: pipe to splice from @@ -545,6 +561,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des if (signal_pending(current)) return -ERESTARTSYS; +repeat: while (pipe_empty(pipe->head, pipe->tail)) { if (!pipe->writers) return 0; @@ -566,6 +583,9 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des pipe_wait_readable(pipe); } + if (eat_empty_buffer(pipe)) + goto repeat; + return 1; } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5430febd34be..7636bc71c71f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -661,7 +661,7 @@ #define BTF \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ __start_BTF = .; \ - *(.BTF) \ + KEEP(*(.BTF)) \ __stop_BTF = .; \ } \ . = ALIGN(4); \ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c145de0473bc..372100c755e7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -767,6 +767,8 @@ struct mlx5_cmd_work_ent { u64 ts2; u16 op; bool polling; + /* Track the max comp handlers */ + refcount_t refcnt; }; struct mlx5_pas { @@ -933,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); +bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); diff --git a/include/linux/net.h b/include/linux/net.h index d48ff1180879..ae713c851342 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -21,6 +21,7 @@ #include <linux/rcupdate.h> #include <linux/once.h> #include <linux/fs.h> +#include <linux/mm.h> #include <linux/sockptr.h> #include <uapi/linux/net.h> @@ -286,6 +287,21 @@ do { \ #define net_get_random_once_wait(buf, nbytes) \ get_random_once_wait((buf), (nbytes)) +/* + * E.g. XFS meta- & log-data is in slab pages, or bcache meta + * data pages, or other high order pages allocated by + * __get_free_pages() without __GFP_COMP, which have a page_count + * of 0 and/or have PageSlab() set. We cannot use send_page for + * those, as that does get_page(); put_page(); and would cause + * either a VM_BUG directly, or __page_cache_release a page that + * would actually still be referenced by someone, leading to some + * obscure delayed Oops somewhere else. + */ +static inline bool sendpage_ok(struct page *page) +{ + return !PageSlab(page) && page_count(page) >= 1; +} + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bd4fcdd0738..18dec08439f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1851,6 +1851,11 @@ enum netdev_priv_flags { * @udp_tunnel_nic: UDP tunnel offload state * @xdp_state: stores info on attached XDP BPF programs * + * @nested_level: Used as as a parameter of spin_lock_nested() of + * dev->addr_list_lock. + * @unlink_list: As netif_addr_lock() can be called recursively, + * keep a list of interfaces to be deleted. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -1955,6 +1960,7 @@ struct net_device { unsigned short type; unsigned short hard_header_len; unsigned char min_header_len; + unsigned char name_assign_type; unsigned short needed_headroom; unsigned short needed_tailroom; @@ -1965,12 +1971,12 @@ struct net_device { unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; + unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; spinlock_t addr_list_lock; - unsigned char name_assign_type; - bool uc_promisc; + struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; @@ -1978,8 +1984,15 @@ struct net_device { #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif +#ifdef CONFIG_LOCKDEP + struct list_head unlink_list; +#endif unsigned int promiscuity; unsigned int allmulti; + bool uc_promisc; +#ifdef CONFIG_LOCKDEP + unsigned char nested_level; +#endif /* Protocol-specific pointers */ @@ -4260,17 +4273,23 @@ static inline void netif_tx_disable(struct net_device *dev) static inline void netif_addr_lock(struct net_device *dev) { - spin_lock(&dev->addr_list_lock); -} + unsigned char nest_level = 0; -static inline void netif_addr_lock_nested(struct net_device *dev) -{ - spin_lock_nested(&dev->addr_list_lock, dev->lower_level); +#ifdef CONFIG_LOCKDEP + nest_level = dev->nested_level; +#endif + spin_lock_nested(&dev->addr_list_lock, nest_level); } static inline void netif_addr_lock_bh(struct net_device *dev) { - spin_lock_bh(&dev->addr_list_lock); + unsigned char nest_level = 0; + +#ifdef CONFIG_LOCKDEP + nest_level = dev->nested_level; +#endif + local_bh_disable(); + spin_lock_nested(&dev->addr_list_lock, nest_level); } static inline void netif_addr_unlock(struct net_device *dev) @@ -4455,12 +4474,38 @@ extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; +enum { + NESTED_SYNC_IMM_BIT, + NESTED_SYNC_TODO_BIT, +}; + +#define __NESTED_SYNC_BIT(bit) ((u32)1 << (bit)) +#define __NESTED_SYNC(name) __NESTED_SYNC_BIT(NESTED_SYNC_ ## name ## _BIT) + +#define NESTED_SYNC_IMM __NESTED_SYNC(IMM) +#define NESTED_SYNC_TODO __NESTED_SYNC(TODO) + +struct netdev_nested_priv { + unsigned char flags; + void *data; +}; + bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); +#ifdef CONFIG_LOCKDEP +static LIST_HEAD(net_unlink_list); + +static inline void net_unlink_todo(struct net_device *dev) +{ + if (list_empty(&dev->unlink_list)) + list_add_tail(&dev->unlink_list, &net_unlink_list); +} +#endif + /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->adj_list.upper, \ @@ -4470,8 +4515,8 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *upper_dev, - void *data), - void *data); + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev); @@ -4508,12 +4553,12 @@ struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, - void *data), - void *data); + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv); int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *lower_dev, - void *data), - void *data); + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h index 9e46678edb2a..255d51c9d36d 100644 --- a/include/linux/platform_data/gpio/gpio-amd-fch.h +++ b/include/linux/platform_data/gpio/gpio-amd-fch.h @@ -19,7 +19,7 @@ #define AMD_FCH_GPIO_REG_GPIO49 0x40 #define AMD_FCH_GPIO_REG_GPIO50 0x41 #define AMD_FCH_GPIO_REG_GPIO51 0x42 -#define AMD_FCH_GPIO_REG_GPIO59_DEVSLP0 0x43 +#define AMD_FCH_GPIO_REG_GPIO55_DEVSLP0 0x43 #define AMD_FCH_GPIO_REG_GPIO57 0x44 #define AMD_FCH_GPIO_REG_GPIO58 0x45 #define AMD_FCH_GPIO_REG_GPIO59_DEVSLP1 0x46 diff --git a/include/net/act_api.h b/include/net/act_api.h index cb382a89ea58..87214927314a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,8 +166,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); - void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 6e5f1e1aa822..8899d7429ccb 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -138,6 +138,7 @@ genl_dumpit_info(struct netlink_callback *cb) * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags + * @validate: validation flags from enum genl_validate_flags * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers diff --git a/include/net/ip.h b/include/net/ip.h index b09c48d862cc..2a52787db64a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -436,12 +436,18 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { struct net *net = dev_net(dst->dev); + unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); + /* 'forwarding = true' case should always honour route mtu */ + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } diff --git a/include/net/netlink.h b/include/net/netlink.h index 8e0eb2c9c528..271620f6bc7f 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1934,7 +1934,8 @@ void nla_get_range_signed(const struct nla_policy *pt, int netlink_policy_dump_start(const struct nla_policy *policy, unsigned int maxtype, unsigned long *state); -bool netlink_policy_dump_loop(unsigned long *state); +bool netlink_policy_dump_loop(unsigned long state); int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state); +void netlink_policy_dump_free(unsigned long state); #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2737d24ec244..9e806c781025 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1773,21 +1773,17 @@ static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_es static inline int xfrm_replay_clone(struct xfrm_state *x, struct xfrm_state *orig) { - x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn), + + x->replay_esn = kmemdup(orig->replay_esn, + xfrm_replay_state_esn_len(orig->replay_esn), GFP_KERNEL); if (!x->replay_esn) return -ENOMEM; - - x->replay_esn->bmp_len = orig->replay_esn->bmp_len; - x->replay_esn->replay_window = orig->replay_esn->replay_window; - - x->preplay_esn = kmemdup(x->replay_esn, - xfrm_replay_state_esn_len(x->replay_esn), + x->preplay_esn = kmemdup(orig->preplay_esn, + xfrm_replay_state_esn_len(orig->preplay_esn), GFP_KERNEL); - if (!x->preplay_esn) { - kfree(x->replay_esn); + if (!x->preplay_esn) return -ENOMEM; - } return 0; } diff --git a/include/soc/mscc/ocelot_ana.h b/include/soc/mscc/ocelot_ana.h index 841c6ec22b64..1669481d9779 100644 --- a/include/soc/mscc/ocelot_ana.h +++ b/include/soc/mscc/ocelot_ana.h @@ -252,10 +252,10 @@ #define ANA_SG_CONFIG_REG_3_LIST_LENGTH_M GENMASK(18, 16) #define ANA_SG_CONFIG_REG_3_LIST_LENGTH_X(x) (((x) & GENMASK(18, 16)) >> 16) #define ANA_SG_CONFIG_REG_3_GATE_ENABLE BIT(20) -#define ANA_SG_CONFIG_REG_3_INIT_IPS(x) (((x) << 24) & GENMASK(27, 24)) -#define ANA_SG_CONFIG_REG_3_INIT_IPS_M GENMASK(27, 24) -#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x) (((x) & GENMASK(27, 24)) >> 24) -#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(28) +#define ANA_SG_CONFIG_REG_3_INIT_IPS(x) (((x) << 21) & GENMASK(24, 21)) +#define ANA_SG_CONFIG_REG_3_INIT_IPS_M GENMASK(24, 21) +#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x) (((x) & GENMASK(24, 21)) >> 21) +#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(25) #define ANA_SG_GCL_GS_CONFIG_RSZ 0x4 diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 4accfa7e266d..8f8dc7a937a4 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -51,11 +51,11 @@ enum rxrpc_cmsg_type { RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ - RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */ + RXRPC_CHARGE_ACCEPT = 14, /* s-: Charge the accept pool with a user call ID */ RXRPC__SUPPORTED }; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index cee9f8e6fce3..f84e7bcad6de 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -288,6 +288,7 @@ enum LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ + LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ __LINUX_MIB_MAX }; diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h index 39df751d0dc4..ac1b65470563 100644 --- a/include/xen/arm/page.h +++ b/include/xen/arm/page.h @@ -83,6 +83,9 @@ static inline unsigned long bfn_to_pfn(unsigned long bfn) }) #define gfn_to_virt(m) (__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT)) +#define percpu_to_gfn(v) \ + (pfn_to_gfn(per_cpu_ptr_to_phys(v) >> XEN_PAGE_SHIFT)) + /* Only used in PV code. But ARM guests are always HVM. */ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) { diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 3b495773de5a..11b3380887fa 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -30,15 +30,15 @@ static struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { - if (!__start_BTF) + bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; + + if (!__start_BTF || bin_attr_btf_vmlinux.size == 0) return 0; btf_kobj = kobject_create_and_add("btf", kernel_kobj); if (!btf_kobj) return -ENOMEM; - bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; - return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47e74f09fa37..fba52d9ec8fc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5667,8 +5667,8 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, bool src_known = tnum_subreg_is_const(src_reg->var_off); bool dst_known = tnum_subreg_is_const(dst_reg->var_off); struct tnum var32_off = tnum_subreg(dst_reg->var_off); - s32 smin_val = src_reg->smin_value; - u32 umin_val = src_reg->umin_value; + s32 smin_val = src_reg->s32_min_value; + u32 umin_val = src_reg->u32_min_value; /* Assuming scalar64_min_max_or will be called so it is safe * to skip updating register for known case. @@ -5691,8 +5691,8 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, /* ORing two positives gives a positive, so safe to * cast result into s64. */ - dst_reg->s32_min_value = dst_reg->umin_value; - dst_reg->s32_max_value = dst_reg->umax_value; + dst_reg->s32_min_value = dst_reg->u32_min_value; + dst_reg->s32_max_value = dst_reg->u32_max_value; } } diff --git a/kernel/umh.c b/kernel/umh.c index fcf3ee803630..3f646613a9d3 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -14,6 +14,7 @@ #include <linux/cred.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/fs_struct.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/mount.h> @@ -72,6 +73,14 @@ static int call_usermodehelper_exec_async(void *data) spin_unlock_irq(¤t->sighand->siglock); /* + * Initial kernel threads share ther FS with init, in order to + * get the init root directory. But we've now created a new + * thread that is going to execve a user process and has its own + * 'struct fs_struct'. Reset umask to the default. + */ + current->fs->umask = 0022; + + /* * Our parent (unbound workqueue) runs with elevated scheduling * priority. Avoid propagating that into the userspace child. */ diff --git a/mm/memory.c b/mm/memory.c index fcfc4ca36eba..eeae590e526a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -806,8 +806,6 @@ copy_present_page(struct mm_struct *dst_mm, struct mm_struct *src_mm, return 1; /* - * The trick starts. - * * What we want to do is to check whether this page may * have been pinned by the parent process. If so, * instead of wrprotect the pte on both sides, we copy @@ -815,47 +813,16 @@ copy_present_page(struct mm_struct *dst_mm, struct mm_struct *src_mm, * the pinned page won't be randomly replaced in the * future. * - * To achieve this, we do the following: - * - * 1. Write-protect the pte if it's writable. This is - * to protect concurrent write fast-gup with - * FOLL_PIN, so that we'll fail the fast-gup with - * the write bit removed. - * - * 2. Check page_maybe_dma_pinned() to see whether this - * page may have been pinned. - * - * The order of these steps is important to serialize - * against the fast-gup code (gup_pte_range()) on the - * pte check and try_grab_compound_head(), so that - * we'll make sure either we'll capture that fast-gup - * so we'll copy the pinned page here, or we'll fail - * that fast-gup. - * - * NOTE! Even if we don't end up copying the page, - * we won't undo this wrprotect(), because the normal - * reference copy will need it anyway. - */ - if (pte_write(pte)) - ptep_set_wrprotect(src_mm, addr, src_pte); - - /* - * These are the "normally we can just copy by reference" - * checks. + * The page pinning checks are just "has this mm ever + * seen pinning", along with the (inexact) check of + * the page count. That might give false positives for + * for pinning, but it will work correctly. */ if (likely(!atomic_read(&src_mm->has_pinned))) return 1; if (likely(!page_maybe_dma_pinned(page))) return 1; - /* - * Uhhuh. It looks like the page might be a pinned page, - * and we actually need to copy it. Now we can set the - * source pte back to being writable. - */ - if (pte_write(pte)) - set_pte_at(src_mm, addr, src_pte, pte); - new_page = *prealloc; if (!new_page) return -EAGAIN; diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index b18cdf03edb3..dfec65eca8a6 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -88,9 +88,10 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, } } -static int br_chk_addr_ip(struct net_device *dev, void *data) +static int br_chk_addr_ip(struct net_device *dev, + struct netdev_nested_priv *priv) { - __be32 ip = *(__be32 *)data; + __be32 ip = *(__be32 *)priv->data; struct in_device *in_dev; __be32 addr = 0; @@ -107,11 +108,15 @@ static int br_chk_addr_ip(struct net_device *dev, void *data) static bool br_is_local_ip(struct net_device *dev, __be32 ip) { - if (br_chk_addr_ip(dev, &ip)) + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; + + if (br_chk_addr_ip(dev, &priv)) return true; /* check if ip is configured on upper dev */ - if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip)) + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv)) return true; return false; @@ -361,9 +366,10 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, } } -static int br_chk_addr_ip6(struct net_device *dev, void *data) +static int br_chk_addr_ip6(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct in6_addr *addr = (struct in6_addr *)data; + struct in6_addr *addr = (struct in6_addr *)priv->data; if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) return 1; @@ -374,11 +380,15 @@ static int br_chk_addr_ip6(struct net_device *dev, void *data) static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) { - if (br_chk_addr_ip6(dev, addr)) + struct netdev_nested_priv priv = { + .data = (void *)addr, + }; + + if (br_chk_addr_ip6(dev, &priv)) return true; /* check if ip is configured on upper dev */ - if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr)) + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv)) return true; return false; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9db504baa094..32ac8343b0ba 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -413,6 +413,8 @@ void br_fdb_delete_by_port(struct net_bridge *br, if (!do_all) if (test_bit(BR_FDB_STATIC, &f->flags) || + (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) && + !test_bit(BR_FDB_OFFLOADED, &f->flags)) || (vid && f->key.vlan_id != vid)) continue; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 147d52596e17..da310f0ca725 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -380,6 +380,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, u32 filter_mask, const struct net_device *dev) { u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + struct nlattr *af = NULL; struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; @@ -423,11 +424,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, nla_nest_end(skb, nest); } + if (filter_mask & (RTEXT_FILTER_BRVLAN | + RTEXT_FILTER_BRVLAN_COMPRESSED | + RTEXT_FILTER_MRP)) { + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + } + /* Check if the VID information is requested */ if ((filter_mask & RTEXT_FILTER_BRVLAN) || (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { struct net_bridge_vlan_group *vg; - struct nlattr *af; int err; /* RCU needed because of the VLAN locking rules (rcu || rtnl) */ @@ -441,11 +449,6 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); goto done; } - af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); - if (!af) { - rcu_read_unlock(); - goto nla_put_failure; - } if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) err = br_fill_ifvlaninfo_compressed(skb, vg); else @@ -456,32 +459,25 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); if (err) goto nla_put_failure; - - nla_nest_end(skb, af); } if (filter_mask & RTEXT_FILTER_MRP) { - struct nlattr *af; int err; if (!br_mrp_enabled(br) || port) goto done; - af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); - if (!af) - goto nla_put_failure; - rcu_read_lock(); err = br_mrp_fill_info(skb, br); rcu_read_unlock(); if (err) goto nla_put_failure; - - nla_nest_end(skb, af); } done: + if (af) + nla_nest_end(skb, af); nlmsg_end(skb, nlh); return 0; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 61c94cefa843..ee8780080be5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1360,7 +1360,7 @@ static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) } static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, - __always_unused void *data) + __always_unused struct netdev_nested_priv *priv) { return br_vlan_is_bind_vlan_dev(dev); } @@ -1383,9 +1383,9 @@ struct br_vlan_bind_walk_data { }; static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, - void *data_in) + struct netdev_nested_priv *priv) { - struct br_vlan_bind_walk_data *data = data_in; + struct br_vlan_bind_walk_data *data = priv->data; int found = 0; if (br_vlan_is_bind_vlan_dev(dev) && @@ -1403,10 +1403,13 @@ br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) struct br_vlan_bind_walk_data data = { .vid = vid, }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, - &data); + &priv); rcu_read_unlock(); return data.result; @@ -1487,9 +1490,9 @@ struct br_vlan_link_state_walk_data { }; static int br_vlan_link_state_change_fn(struct net_device *vlan_dev, - void *data_in) + struct netdev_nested_priv *priv) { - struct br_vlan_link_state_walk_data *data = data_in; + struct br_vlan_link_state_walk_data *data = priv->data; if (br_vlan_is_bind_vlan_dev(vlan_dev)) br_vlan_set_vlan_dev_state(data->br, vlan_dev); @@ -1503,10 +1506,13 @@ static void br_vlan_link_state_change(struct net_device *dev, struct br_vlan_link_state_walk_data data = { .br = br }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn, - &data); + &priv); rcu_read_unlock(); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index bdfd66ba3843..d4d7a0e52491 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -575,7 +575,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ - if (page_count(page) >= 1 && !PageSlab(page)) + if (sendpage_ok(page)) sendpage = sock->ops->sendpage; else sendpage = sock_no_sendpage; diff --git a/net/core/dev.c b/net/core/dev.c index 266073e300b5..4906b44af850 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6812,9 +6812,10 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } -static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, + struct netdev_nested_priv *priv) { - struct net_device *dev = data; + struct net_device *dev = (struct net_device *)priv->data; return upper_dev == dev; } @@ -6831,10 +6832,14 @@ static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -6851,8 +6856,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@ -6997,8 +7006,8 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, static int __netdev_walk_all_upper_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7010,7 +7019,7 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7046,8 +7055,8 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7058,7 +7067,7 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7094,10 +7103,15 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); static bool __netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } /** @@ -7215,8 +7229,8 @@ static struct net_device *__netdev_next_lower_dev(struct net_device *dev, int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7227,7 +7241,7 @@ int netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7262,8 +7276,8 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); static int __netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7275,7 +7289,7 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7364,22 +7378,34 @@ static u8 __netdev_lower_depth(struct net_device *dev) return max_depth; } -static int __netdev_update_upper_level(struct net_device *dev, void *data) +static int __netdev_update_upper_level(struct net_device *dev, + struct netdev_nested_priv *__unused) { dev->upper_level = __netdev_upper_depth(dev) + 1; return 0; } -static int __netdev_update_lower_level(struct net_device *dev, void *data) +static int __netdev_update_lower_level(struct net_device *dev, + struct netdev_nested_priv *priv) { dev->lower_level = __netdev_lower_depth(dev) + 1; + +#ifdef CONFIG_LOCKDEP + if (!priv) + return 0; + + if (priv->flags & NESTED_SYNC_IMM) + dev->nested_level = dev->lower_level - 1; + if (priv->flags & NESTED_SYNC_TODO) + net_unlink_todo(dev); +#endif return 0; } int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7390,7 +7416,7 @@ int netdev_walk_all_lower_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7650,6 +7676,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, + struct netdev_nested_priv *priv, struct netlink_ext_ack *extack) { struct netdev_notifier_changeupper_info changeupper_info = { @@ -7706,9 +7733,9 @@ static int __netdev_upper_dev_link(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); return 0; @@ -7733,8 +7760,13 @@ int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, false, - NULL, NULL, extack); + NULL, NULL, &priv, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -7757,21 +7789,19 @@ int netdev_master_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info, extack); + upper_priv, upper_info, &priv, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -/** - * netdev_upper_dev_unlink - Removes a link to upper device - * @dev: device - * @upper_dev: new upper device - * - * Removes a link to device which is upper to this one. The caller must hold - * the RTNL lock. - */ -void netdev_upper_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev, + struct netdev_nested_priv *priv) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { @@ -7796,9 +7826,28 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); +} + +/** + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device + * + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. + */ +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_TODO, + .data = NULL, + }; + + __netdev_upper_dev_unlink(dev, upper_dev, &priv); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -7834,6 +7883,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, struct net_device *dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; int err; if (!new_dev) @@ -7841,8 +7894,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, if (old_dev && new_dev != old_dev) netdev_adjacent_dev_disable(dev, old_dev); - - err = netdev_upper_dev_link(new_dev, dev, extack); + err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv, + extack); if (err) { if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); @@ -7857,6 +7910,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + if (!new_dev || !old_dev) return; @@ -7864,7 +7922,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, return; netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(old_dev, dev); + __netdev_upper_dev_unlink(old_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_commit); @@ -7872,13 +7930,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; + if (!new_dev) return; if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(new_dev, dev); + __netdev_upper_dev_unlink(new_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_abort); @@ -10062,6 +10125,19 @@ static void netdev_wait_allrefs(struct net_device *dev) void netdev_run_todo(void) { struct list_head list; +#ifdef CONFIG_LOCKDEP + struct list_head unlink_list; + + list_replace_init(&net_unlink_list, &unlink_list); + + while (!list_empty(&unlink_list)) { + struct net_device *dev = list_first_entry(&unlink_list, + struct net_device, + unlink_list); + list_del(&dev->unlink_list); + dev->nested_level = dev->lower_level - 1; + } +#endif /* Snapshot list, allow later requests */ list_replace_init(&net_todo_list, &list); @@ -10274,6 +10350,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; dev->lower_level = 1; +#ifdef CONFIG_LOCKDEP + dev->nested_level = 0; + INIT_LIST_HEAD(&dev->unlink_list); +#endif INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 54cd568e7c2f..fa1c37ec40c9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -700,7 +700,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) * larger. */ netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -867,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -897,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -922,7 +922,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); diff --git a/net/core/filter.c b/net/core/filter.c index 21eaf3b182f2..b5f3faac5e3b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9558,6 +9558,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) { + /* BTF types for tcp_timewait_sock and inet_timewait_sock are not + * generated if CONFIG_INET=n. Trigger an explicit generation here. + */ + BTF_TYPE_EMIT(struct inet_timewait_sock); + BTF_TYPE_EMIT(struct tcp_timewait_sock); + #ifdef CONFIG_INET if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6faf73d6a0f7..2b48cb0cc684 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5622,7 +5622,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); - if (ethernet) + if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; @@ -5662,7 +5662,7 @@ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); - if (ethernet) { + if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */ diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5c2072765be7..0c3f54baec4e 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -866,7 +866,7 @@ static const struct genl_multicast_group ethtool_nl_mcgrps[] = { [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME }, }; -static struct genl_family ethtool_genl_family = { +static struct genl_family ethtool_genl_family __ro_after_init = { .name = ETHTOOL_GENL_NAME, .version = ETHTOOL_GENL_VERSION, .netnsok = true, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 49daaed89764..f687abb069fa 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -490,6 +490,7 @@ static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .priority = 0, }; +#if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .handler = vti_rcv_tunnel, .cb_handler = vti_rcv_cb, @@ -497,6 +498,7 @@ static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .priority = 0, }; #endif +#endif static int __net_init vti_init_net(struct net *net) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1074df726ec0..8d5e1695b9aa 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -293,6 +293,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), + SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f0794f0232ba..e03756631541 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,7 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { - refcount_set(&req->rsk_refcnt, 2); + reqsk_put(req); return child; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 31f3b858db81..2135ee7c806d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (IS_ENABLED(CONFIG_DEBUG_VM) && - WARN_ONCE(PageSlab(page), "page must not be a Slab one")) + WARN_ONCE(!sendpage_ok(page), + "page must not be a Slab one and have page_count > 0")) return -EINVAL; /* Wait for a connection to finish. One exception is TCP Fast Open diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 184ea556f50e..b1ce2054291d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -885,21 +885,34 @@ struct tcp_sacktag_state { struct rate_sample *rate; }; -/* Take a notice that peer is sending D-SACKs */ +/* Take a notice that peer is sending D-SACKs. Skip update of data delivery + * and spurious retransmission information if this DSACK is unlikely caused by + * sender's action: + * - DSACKed sequence range is larger than maximum receiver's window. + * - Total no. of DSACKed segments exceed the total no. of retransmitted segs. + */ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, u32 end_seq, struct tcp_sacktag_state *state) { u32 seq_len, dup_segs = 1; - if (before(start_seq, end_seq)) { - seq_len = end_seq - start_seq; - if (seq_len > tp->mss_cache) - dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); - } + if (!before(start_seq, end_seq)) + return 0; + + seq_len = end_seq - start_seq; + /* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */ + if (seq_len > tp->max_window) + return 0; + if (seq_len > tp->mss_cache) + dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); + + tp->dsack_dups += dup_segs; + /* Skip the DSACK if dup segs weren't retransmitted by sender */ + if (tp->dsack_dups > tp->total_retrans) + return 0; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; tp->rack.dsack_seen = 1; - tp->dsack_dups += dup_segs; state->flag |= FLAG_DSACKING_ACK; /* A spurious retransmission is delivered */ @@ -1153,6 +1166,11 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state); + if (!dup_segs) { /* Skip dubious DSACK */ + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS); + return false; + } + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs); /* D-SACK for already forgotten data... Do dumb counting. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5084333b5ab6..592c73962723 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1788,12 +1788,12 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, hdrlen); if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { - thtail->window = th->window; - TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; - if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) + if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) { TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + thtail->window = th->window; + } /* We have to update both TCP_SKB_CB(tail)->tcp_flags and * thtail->fin, so that the fast path in tcp_rcv_established() diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 7fa822b55c34..888bbbbb3e8a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -451,7 +451,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_ext *ext) { - u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq); + /* The write_seq value has already been incremented, so the actual + * sequence number for the DATA_FIN is one less. + */ + u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq) - 1; if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values @@ -460,10 +463,7 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, ext->data_fin = 1; ext->use_map = 1; ext->dsn64 = 1; - /* The write_seq value has already been incremented, so - * the actual sequence number for the DATA_FIN is one less. - */ - ext->data_seq = data_fin_tx_seq - 1; + ext->data_seq = data_fin_tx_seq; ext->subflow_seq = 0; ext->data_len = 1; } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) { @@ -518,11 +518,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (subflow->use_64bit_ack) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = msk->ack_seq; + opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); + opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq); opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; @@ -782,7 +782,7 @@ static void update_una(struct mptcp_sock *msk, } } -bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) { /* Skip if DATA_FIN was already received. * If updating simultaneously with the recvmsg loop, values @@ -792,7 +792,8 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first)) return false; - WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq); + WRITE_ONCE(msk->rcv_data_fin_seq, + expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; @@ -875,7 +876,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && - mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) && + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) && schedule_work(&msk->work)) sock_hold(subflow->conn); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 365ba96c84b0..5d747c6a610e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -123,7 +123,7 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_ext_reset(skb); skb_orphan(skb); - msk->ack_seq += copy_len; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail = skb_peek_tail(&sk->sk_receive_queue); if (offset == 0 && tail) { @@ -261,7 +261,7 @@ static void mptcp_check_data_fin(struct sock *sk) if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) { struct mptcp_subflow_context *subflow; - msk->ack_seq++; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); sk->sk_shutdown |= RCV_SHUTDOWN; @@ -1720,7 +1720,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; - msk->ack_seq = ack_seq; + WRITE_ONCE(msk->ack_seq, ack_seq); } sock_reset_flag(nsk, SOCK_RCU_FREE); @@ -2072,7 +2072,7 @@ bool mptcp_finish_join(struct sock *sk) parent_sock = READ_ONCE(parent->sk_socket); if (parent_sock && !sk->sk_socket) mptcp_sock_graft(sk, parent_sock); - subflow->map_seq = msk->ack_seq; + subflow->map_seq = READ_ONCE(msk->ack_seq); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 60b27d44c184..20f04ac85409 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -387,7 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); -bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq); +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9ead43f79023..6f035af1c9d2 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -731,7 +731,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (mpext->data_fin == 1) { if (data_len == 1) { - mptcp_update_rcv_data_fin(msk, mpext->data_seq); + bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, + mpext->dsn64); pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS @@ -742,11 +743,23 @@ static enum mapping_status get_mapping_status(struct sock *ssk, skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; } else { + if (updated && schedule_work(&msk->work)) + sock_hold((struct sock *)msk); + return MAPPING_DATA_FIN; } } else { - mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len); - pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len); + u64 data_fin_seq = mpext->data_seq + data_len - 1; + + /* If mpext->data_seq is a 32-bit value, data_fin_seq + * must also be limited to 32 bits. + */ + if (!mpext->dsn64) + data_fin_seq &= GENMASK_ULL(31, 0); + + mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); + pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", + data_fin_seq, mpext->dsn64); } /* Adjust for DATA_FIN using 1 byte of sequence space */ diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1eb65a7a27fd..c4b4d3376227 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; - while (netlink_policy_dump_loop(&cb->args[1])) { + while (netlink_policy_dump_loop(cb->args[1])) { void *hdr; struct nlattr *nest; @@ -1113,6 +1113,12 @@ nla_put_failure: return skb->len; } +static int ctrl_dumppolicy_done(struct netlink_callback *cb) +{ + netlink_policy_dump_free(cb->args[1]); + return 0; +} + static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, @@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETPOLICY, .dumpit = ctrl_dumppolicy, + .done = ctrl_dumppolicy_done, }, }; diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 641ffbdd977a..0176b59ce530 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -84,7 +84,6 @@ int netlink_policy_dump_start(const struct nla_policy *policy, unsigned int policy_idx; int err; - /* also returns 0 if "*_state" is our ERR_PTR() end marker */ if (*_state) return 0; @@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished(struct nl_policy_dump *state) !state->policies[state->policy_idx].policy; } -bool netlink_policy_dump_loop(unsigned long *_state) +bool netlink_policy_dump_loop(unsigned long _state) { - struct nl_policy_dump *state = (void *)*_state; - - if (IS_ERR(state)) - return false; - - if (netlink_policy_dump_finished(state)) { - kfree(state); - /* store end marker instead of freed state */ - *_state = (unsigned long)ERR_PTR(-ENOENT); - return false; - } + struct nl_policy_dump *state = (void *)_state; - return true; + return !netlink_policy_dump_finished(state); } int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state) @@ -309,3 +298,10 @@ nla_put_failure: nla_nest_cancel(skb, policy); return -ENOBUFS; } + +void netlink_policy_dump_free(unsigned long _state) +{ + struct nl_policy_dump *state = (void *)_state; + + kfree(state); +} diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a3f1204f1ed2..12d42ab0193b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -905,15 +905,19 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); - if (err == NF_ACCEPT && - ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype); + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { + if (ct->status & IPS_SRC_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, + maniptype); + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, + NF_NAT_MANIP_SRC); + } } /* Mark NAT done if successful and update the flow key. */ diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index d8252fdab851..b8559c882431 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -199,17 +199,30 @@ static int announce_servers(struct sockaddr_qrtr *sq) if (!node) return 0; + rcu_read_lock(); /* Announce the list of servers registered in this node */ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); ret = service_announce_new(sq, srv); if (ret < 0) { pr_err("failed to announce new service\n"); return ret; } + + rcu_read_lock(); } + rcu_read_unlock(); + return 0; } @@ -344,11 +357,22 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) if (!node) return 0; + rcu_read_lock(); /* Advertise removal of this client to all servers of remote node */ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); server_del(node, srv->port); + rcu_read_lock(); } + rcu_read_unlock(); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -359,8 +383,17 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); pkt.client.node = cpu_to_le32(from->sq_node); + rcu_read_lock(); radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); sq.sq_family = AF_QIPCRTR; sq.sq_node = srv->node; @@ -374,8 +407,11 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) pr_err("failed to send bye cmd\n"); return ret; } + rcu_read_lock(); } + rcu_read_unlock(); + return 0; } @@ -434,8 +470,17 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, pkt.client.node = cpu_to_le32(node_id); pkt.client.port = cpu_to_le32(port); + rcu_read_lock(); radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + slot = radix_tree_iter_resume(slot, &iter); + rcu_read_unlock(); sq.sq_family = AF_QIPCRTR; sq.sq_node = srv->node; @@ -449,8 +494,11 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, pr_err("failed to send del client cmd\n"); return ret; } + rcu_read_lock(); } + rcu_read_unlock(); + return 0; } @@ -554,20 +602,40 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, filter.service = service; filter.instance = instance; + rcu_read_lock(); radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) { node = radix_tree_deref_slot(node_slot); + if (!node) + continue; + if (radix_tree_deref_retry(node)) { + node_slot = radix_tree_iter_retry(&node_iter); + continue; + } + node_slot = radix_tree_iter_resume(node_slot, &node_iter); radix_tree_for_each_slot(srv_slot, &node->servers, &srv_iter, 0) { struct qrtr_server *srv; srv = radix_tree_deref_slot(srv_slot); + if (!srv) + continue; + if (radix_tree_deref_retry(srv)) { + srv_slot = radix_tree_iter_retry(&srv_iter); + continue; + } + if (!server_match(srv, &filter)) continue; + srv_slot = radix_tree_iter_resume(srv_slot, &srv_iter); + + rcu_read_unlock(); lookup_notify(from, srv, true); + rcu_read_lock(); } } + rcu_read_unlock(); /* Empty notification, to indicate end of listing */ lookup_notify(from, NULL, true); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 884cff7bb169..97aebb5d19db 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -518,7 +518,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ - RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ @@ -714,8 +713,8 @@ struct rxrpc_ack_summary { enum rxrpc_command { RXRPC_CMD_SEND_DATA, /* send data message */ RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ + RXRPC_CMD_CHARGE_ACCEPT, /* [server] charge accept preallocation */ }; struct rxrpc_call_params { @@ -755,9 +754,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_sock *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, - rxrpc_notify_rx_t); -int rxrpc_reject_call(struct rxrpc_sock *); +int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); /* * call_event.c diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index ef160566aa9a..8df1964db333 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -39,8 +39,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, unsigned int debug_id) { const void *here = __builtin_return_address(0); - struct rxrpc_call *call; + struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); + struct rb_node *parent, **pp; int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; @@ -94,7 +95,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, } /* Now it gets complicated, because calls get registered with the - * socket here, particularly if a user ID is preassigned by the user. + * socket here, with a user ID preassigned by the user. */ call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) @@ -107,34 +108,33 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, here, (const void *)user_call_ID); write_lock(&rx->call_lock); - if (user_attach_call) { - struct rxrpc_call *xcall; - struct rb_node *parent, **pp; - - /* Check the user ID isn't already in use */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < xcall->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > xcall->user_call_ID) - pp = &(*pp)->rb_right; - else - goto id_in_use; - } - call->user_call_ID = user_call_ID; - call->notify_rx = notify_rx; + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < xcall->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > xcall->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + if (user_attach_call) { rxrpc_get_call(call, rxrpc_call_got_kernel); user_attach_call(call, user_call_ID); - rxrpc_get_call(call, rxrpc_call_got_userid); - rb_link_node(&call->sock_node, parent, pp); - rb_insert_color(&call->sock_node, &rx->calls); - set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); @@ -157,11 +157,8 @@ id_in_use: } /* - * Preallocate sufficient service connections, calls and peers to cover the - * entire backlog of a socket. When a new call comes in, if we don't have - * sufficient of each available, the call gets rejected as busy or ignored. - * - * The backlog is replenished when a connection is accepted or rejected. + * Allocate the preallocation buffers for incoming service calls. These must + * be charged manually. */ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { @@ -174,13 +171,6 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) rx->backlog = b; } - if (rx->discard_new_call) - return 0; - - while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp, - atomic_inc_return(&rxrpc_debug_id)) == 0) - ; - return 0; } @@ -333,6 +323,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, rxrpc_see_call(call); call->conn = conn; call->security = conn->security; + call->security_ix = conn->security_ix; call->peer = rxrpc_get_peer(conn->params.peer); call->cong_cwnd = call->peer->cong_cwnd; return call; @@ -402,8 +393,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, if (rx->notify_new_call) rx->notify_new_call(&rx->sk, call, call->user_call_ID); - else - sk_acceptq_added(&rx->sk); spin_lock(&conn->state_lock); switch (conn->state) { @@ -415,12 +404,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, case RXRPC_CONN_SERVICE: write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - if (rx->discard_new_call) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - else - call->state = RXRPC_CALL_SERVER_ACCEPTING; - } + if (call->state < RXRPC_CALL_COMPLETE) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; write_unlock(&call->state_lock); break; @@ -440,9 +425,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, rxrpc_send_ping(call, skb); - if (call->state == RXRPC_CALL_SERVER_ACCEPTING) - rxrpc_notify_socket(call); - /* We have to discard the prealloc queue's ref here and rely on a * combination of the RCU read lock and refs held either by the socket * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel @@ -460,187 +442,18 @@ no_call: } /* - * handle acceptance of a call by userspace - * - assign the user call ID to the call at the front of the queue - * - called with the socket locked. + * Charge up socket with preallocated calls, attaching user call IDs. */ -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID, - rxrpc_notify_rx_t notify_rx) - __releases(&rx->sk.sk_lock.slock) - __acquires(call->user_mutex) +int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) { - struct rxrpc_call *call; - struct rb_node *parent, **pp; - int ret; - - _enter(",%lx", user_call_ID); - - ASSERT(!irqs_disabled()); - - write_lock(&rx->call_lock); - - if (list_empty(&rx->to_be_accepted)) { - write_unlock(&rx->call_lock); - release_sock(&rx->sk); - kleave(" = -ENODATA [empty]"); - return ERR_PTR(-ENODATA); - } - - /* check the user ID isn't already in use */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - call = rb_entry(parent, struct rxrpc_call, sock_node); - - if (user_call_ID < call->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) - pp = &(*pp)->rb_right; - else - goto id_in_use; - } - - /* Dequeue the first call and check it's still valid. We gain - * responsibility for the queue's reference. - */ - call = list_entry(rx->to_be_accepted.next, - struct rxrpc_call, accept_link); - write_unlock(&rx->call_lock); - - /* We need to gain the mutex from the interrupt handler without - * upsetting lockdep, so we have to release it there and take it here. - * We are, however, still holding the socket lock, so other accepts - * must wait for us and no one can add the user ID behind our backs. - */ - if (mutex_lock_interruptible(&call->user_mutex) < 0) { - release_sock(&rx->sk); - kleave(" = -ERESTARTSYS"); - return ERR_PTR(-ERESTARTSYS); - } - - write_lock(&rx->call_lock); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - rxrpc_see_call(call); - - /* Find the user ID insertion point. */ - pp = &rx->calls.rb_node; - parent = NULL; - while (*pp) { - parent = *pp; - call = rb_entry(parent, struct rxrpc_call, sock_node); - - if (user_call_ID < call->user_call_ID) - pp = &(*pp)->rb_left; - else if (user_call_ID > call->user_call_ID) - pp = &(*pp)->rb_right; - else - BUG(); - } - - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_SERVER_ACCEPTING: - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - break; - case RXRPC_CALL_COMPLETE: - ret = call->error; - goto out_release; - default: - BUG(); - } - - /* formalise the acceptance */ - call->notify_rx = notify_rx; - call->user_call_ID = user_call_ID; - rxrpc_get_call(call, rxrpc_call_got_userid); - rb_link_node(&call->sock_node, parent, pp); - rb_insert_color(&call->sock_node, &rx->calls); - if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) - BUG(); - - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - rxrpc_notify_socket(call); - rxrpc_service_prealloc(rx, GFP_KERNEL); - release_sock(&rx->sk); - _leave(" = %p{%d}", call, call->debug_id); - return call; - -out_release: - _debug("release %p", call); - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - goto out; - -id_in_use: - ret = -EBADSLT; - write_unlock(&rx->call_lock); -out: - rxrpc_service_prealloc(rx, GFP_KERNEL); - release_sock(&rx->sk); - _leave(" = %d", ret); - return ERR_PTR(ret); -} - -/* - * Handle rejection of a call by userspace - * - reject the call at the front of the queue - */ -int rxrpc_reject_call(struct rxrpc_sock *rx) -{ - struct rxrpc_call *call; - bool abort = false; - int ret; - - _enter(""); - - ASSERT(!irqs_disabled()); - - write_lock(&rx->call_lock); - - if (list_empty(&rx->to_be_accepted)) { - write_unlock(&rx->call_lock); - return -ENODATA; - } - - /* Dequeue the first call and check it's still valid. We gain - * responsibility for the queue's reference. - */ - call = list_entry(rx->to_be_accepted.next, - struct rxrpc_call, accept_link); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - rxrpc_see_call(call); + struct rxrpc_backlog *b = rx->backlog; - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED); - abort = true; - fallthrough; - case RXRPC_CALL_COMPLETE: - ret = call->error; - goto out_discard; - default: - BUG(); - } + if (rx->sk.sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; -out_discard: - write_unlock_bh(&call->state_lock); - write_unlock(&rx->call_lock); - if (abort) { - rxrpc_send_abort_packet(call); - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - } - rxrpc_service_prealloc(rx, GFP_KERNEL); - _leave(" = %d", ret); - return ret; + return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, + GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); } /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a40fae013942..ed49769b459d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -23,7 +23,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", - [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -352,9 +351,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->call_id = sp->hdr.callNumber; call->service_id = sp->hdr.serviceId; call->cid = sp->hdr.cid; - call->state = RXRPC_CALL_SERVER_ACCEPTING; - if (sp->hdr.securityIndex > 0) - call->state = RXRPC_CALL_SERVER_SECURING; + call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; /* Set the channel for this call. We don't get channel_lock as we're diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 447f55ca6886..64ace2960ecc 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -269,7 +269,7 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) if (call) { write_lock_bh(&call->state_lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { - call->state = RXRPC_CALL_SERVER_ACCEPTING; + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; rxrpc_notify_socket(call); } write_unlock_bh(&call->state_lock); @@ -340,18 +340,18 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; spin_lock(&conn->channel_lock); - spin_lock(&conn->state_lock); + spin_lock_bh(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); + spin_unlock_bh(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); } else { - spin_unlock(&conn->state_lock); + spin_unlock_bh(&conn->state_lock); } spin_unlock(&conn->channel_lock); diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 94c3df392651..2e8bd3b97301 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -903,7 +903,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) _enter(""); - if (optlen <= 0 || optlen > PAGE_SIZE - 1) + if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities) return -EINVAL; description = memdup_sockptr_nul(optval, optlen); @@ -940,7 +940,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) if (IS_ERR(description)) return PTR_ERR(description); - key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk), NULL); + key = request_key(&key_type_keyring, description, NULL); if (IS_ERR(key)) { kfree(description); _leave(" = %ld", PTR_ERR(key)); @@ -1072,7 +1072,7 @@ static long rxrpc_read(const struct key *key, switch (token->security_index) { case RXRPC_SECURITY_RXKAD: - toksize += 9 * 4; /* viceid, kvno, key*2 + len, begin, + toksize += 8 * 4; /* viceid, kvno, key*2, begin, * end, primary, tktlen */ toksize += RND(token->kad->ticket_len); break; @@ -1107,7 +1107,8 @@ static long rxrpc_read(const struct key *key, break; default: /* we have a ticket we can't encode */ - BUG(); + pr_err("Unsupported key token type (%u)\n", + token->security_index); continue; } @@ -1138,6 +1139,14 @@ static long rxrpc_read(const struct key *key, memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ xdr += (_l + 3) >> 2; \ } while(0) +#define ENCODE_BYTES(l, s) \ + do { \ + u32 _l = (l); \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ + xdr += (_l + 3) >> 2; \ + } while(0) #define ENCODE64(x) \ do { \ __be64 y = cpu_to_be64(x); \ @@ -1165,7 +1174,7 @@ static long rxrpc_read(const struct key *key, case RXRPC_SECURITY_RXKAD: ENCODE(token->kad->vice_id); ENCODE(token->kad->kvno); - ENCODE_DATA(8, token->kad->session_key); + ENCODE_BYTES(8, token->kad->session_key); ENCODE(token->kad->start); ENCODE(token->kad->expiry); ENCODE(token->kad->primary_flag); @@ -1215,7 +1224,6 @@ static long rxrpc_read(const struct key *key, break; default: - BUG(); break; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index c4684dde1f16..2c842851d72e 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -179,37 +179,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) } /* - * Pass back notification of a new call. The call is added to the - * to-be-accepted list. This means that the next call to be accepted might not - * be the last call seen awaiting acceptance, but unless we leave this on the - * front of the queue and block all other messages until someone gives us a - * user_ID for it, there's not a lot we can do. - */ -static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, int flags) -{ - int tmp = 0, ret; - - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); - - if (ret == 0 && !(flags & MSG_PEEK)) { - _debug("to be accepted"); - write_lock_bh(&rx->recvmsg_lock); - list_del_init(&call->recvmsg_link); - write_unlock_bh(&rx->recvmsg_lock); - - rxrpc_get_call(call, rxrpc_call_got); - write_lock(&rx->call_lock); - list_add_tail(&call->accept_link, &rx->to_be_accepted); - write_unlock(&rx->call_lock); - } - - trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); - return ret; -} - -/* * End the packet reception phase. */ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) @@ -630,9 +599,6 @@ try_again: } switch (READ_ONCE(call->state)) { - case RXRPC_CALL_SERVER_ACCEPTING: - ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); - break; case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -728,7 +694,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, call->debug_id, rxrpc_call_states[call->state], iov_iter_count(iter), want_more); - ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); mutex_lock(&call->user_mutex); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0824e103d037..d27140c836cc 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -530,10 +530,10 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) return -EINVAL; break; - case RXRPC_ACCEPT: + case RXRPC_CHARGE_ACCEPT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; - p->command = RXRPC_CMD_ACCEPT; + p->command = RXRPC_CMD_CHARGE_ACCEPT; if (len != 0) return -EINVAL; break; @@ -659,16 +659,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (ret < 0) goto error_release_sock; - if (p.command == RXRPC_CMD_ACCEPT) { + if (p.command == RXRPC_CMD_CHARGE_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); - /* The socket is now unlocked. */ - if (IS_ERR(call)) - return PTR_ERR(call); - ret = 0; - goto out_put_unlock; + ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID); + goto error_release_sock; } call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); @@ -690,7 +686,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: - case RXRPC_CALL_SERVER_ACCEPTING: rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 063d8aaf2900..798430e1a79f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -235,6 +235,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, index++; if (index < s_i) continue; + if (IS_ERR(p)) + continue; if (jiffy_since && time_after(jiffy_since, @@ -307,6 +309,8 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { + if (IS_ERR(p)) + continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) { module_put(ops->owner); @@ -467,17 +471,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, } EXPORT_SYMBOL(tcf_idr_create_from_flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) -{ - struct tcf_idrinfo *idrinfo = tn->idrinfo; - - mutex_lock(&idrinfo->lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ - WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index))); - mutex_unlock(&idrinfo->lock); -} -EXPORT_SYMBOL(tcf_idr_insert); - /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) @@ -731,13 +724,6 @@ int tcf_action_destroy(struct tc_action *actions[], int bind) return ret; } -static int tcf_action_destroy_1(struct tc_action *a, int bind) -{ - struct tc_action *actions[] = { a, NULL }; - - return tcf_action_destroy(actions, bind); -} - static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); @@ -902,6 +888,26 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; +static void tcf_idr_insert_many(struct tc_action *actions[]) +{ + int i; + + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + struct tc_action *a = actions[i]; + struct tcf_idrinfo *idrinfo; + + if (!a) + continue; + idrinfo = a->idrinfo; + mutex_lock(&idrinfo->lock); + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if + * it is just created, otherwise this is just a nop. + */ + idr_replace(&idrinfo->action_idr, a, a->tcfa_index); + mutex_unlock(&idrinfo->lock); + } +} + struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -1002,13 +1008,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !rcu_access_pointer(a->goto_chain)) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); - return ERR_PTR(-EINVAL); - } - return a; err_mod: @@ -1051,6 +1050,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, actions[i - 1] = act; } + /* We have to commit them all together, because if any error happened in + * between, we could not handle the failure gracefully. + */ + tcf_idr_insert_many(actions); + *attr_size = tcf_action_full_attrs_size(sz); return i - 1; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 54d5652cfe6c..a4c7ba35a343 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -365,9 +365,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (res == ACT_P_CREATED) { - tcf_idr_insert(tn, *act); - } else { + if (res != ACT_P_CREATED) { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f901421b0634..e19885d7fe2c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -139,7 +139,6 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else if (ret > 0) { ci = to_connmark(*a); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index f5826e457679..4fa4fcb842ba 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -110,9 +110,6 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (params_new) kfree_rcu(params_new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2c3619165680..a780afdf570d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1297,8 +1297,6 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, tcf_chain_put_by_act(goto_ch); if (params) call_rcu(¶ms->rcu, tcf_ct_params_free); - if (res == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return res; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b5042f3ea079..6084300e51ad 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -269,9 +269,6 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, if (cp_new) kfree_rcu(cp_new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 410e3bbfb9ca..73c3926358a0 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -140,8 +140,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; release_idr: tcf_idr_release(*a, bind); diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 1fb8d428d2c1..7c0771dd77a3 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -437,9 +437,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; chain_put: diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 5c568757643b..a2ddea04183a 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -627,9 +627,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; metadata_parse_err: if (goto_ch) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 400a2cfe8452..8dc3bec0d325 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -189,8 +189,6 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, ipt->tcfi_t = t; ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; err3: diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b2705318993b..e24b7e2331cd 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -194,8 +194,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock(&mirred_list_lock); - - tcf_idr_insert(tn, *a); } return ret; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8118e2640979..e298ec3b3c9e 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -273,8 +273,6 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 855a6fa16a62..1ebd2a86d980 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -93,9 +93,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; release_idr: tcf_idr_release(*a, bind); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c158bfed86d5..b45304446e13 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -238,8 +238,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0b431d493768..8d8452b1cdd4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -201,8 +201,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new) kfree_rcu(new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; failure: diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 5e2df590bb58..3ebf9ede3cf1 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -116,8 +116,6 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 9813ca4006dd..a4f3d0f0daa9 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -157,8 +157,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, goto release_idr; } - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index d0652386c6e2..e5f3fb8b00e3 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -225,8 +225,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 39e6d94cfafb..81a1c67335be 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -190,8 +190,6 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 37f1e10f35e0..a229751ee8c4 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -537,9 +537,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a5ff9f68ab02..163b0385fd4c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -229,8 +229,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 9e289c770574..7e59d8a18f3e 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -494,6 +494,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) out_err: /* Clean up any successful allocations */ sctp_auth_destroy_hmacs(ep->auth_hmacs); + ep->auth_hmacs = NULL; return -ENOMEM; } diff --git a/net/socket.c b/net/socket.c index 0c0144604f81..58cac2da5f66 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3638,9 +3638,11 @@ EXPORT_SYMBOL(kernel_getpeername); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { - if (sock->ops->sendpage) + if (sock->ops->sendpage) { + /* Warn in case the improper page to zero-copy send */ + WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send"); return sock->ops->sendpage(sock, page, offset, size, flags); - + } return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(kernel_sendpage); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 865f3e037425..23d868545362 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data - * + * @extack: netlink extended ack * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9a3d9fedd7aa..95ab5545a931 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2143,10 +2143,15 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; + int pending; /* Wait for any pending async encryptions to complete */ - smp_store_mb(ctx->async_notify, true); - if (atomic_read(&ctx->encrypt_pending)) + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + + if (pending) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); tls_tx_records(sk, -1); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2c9e9a2d1688..7fd45f6ddb05 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4172,6 +4172,9 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (key.idx < 0) + return -EINVAL; + if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c3231620d210..6c5e09e7440a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -377,15 +377,30 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; + /* Hinder dev_direct_xmit from freeing the packet and + * therefore completing it in the destructor + */ + refcount_inc(&skb->users); err = dev_direct_xmit(skb, xs->queue_id); + if (err == NETDEV_TX_BUSY) { + /* Tell user-space to retry the send */ + skb->destructor = sock_wfree; + /* Free skb without triggering the perf drop trace */ + consume_skb(skb); + err = -EAGAIN; + goto out; + } + xskq_cons_release(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ - if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { + if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ + kfree_skb(skb); err = -EBUSY; goto out; } + consume_skb(skb); sent_frame = true; } diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 827ccdf2db57..1f08ebf7d80c 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -29,8 +29,12 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, static void handle_esp(struct sk_buff *skb, struct sock *sk) { + struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb; + skb_reset_transport_header(skb); - memset(skb->cb, 0, sizeof(skb->cb)); + + /* restore IP CB, we need at least IP6CB->nhoff */ + memmove(skb->cb, &tcp_cb->header, sizeof(tcp_cb->header)); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index eb8181987620..a8f66112c52b 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -303,7 +303,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (!skb->ignore_df && skb->len > mtu) { + if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 69520ad3d83b..efc89a92961d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1019,7 +1019,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, */ if (x->km.state == XFRM_STATE_VALID) { if ((x->sel.family && - !xfrm_selector_match(&x->sel, fl, x->sel.family)) || + (x->sel.family != family || + !xfrm_selector_match(&x->sel, fl, family))) || !security_xfrm_state_pol_flow_match(x, pol, fl)) return; @@ -1032,7 +1033,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, *acq_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, x->sel.family) && + if ((!x->sel.family || + (x->sel.family == family && + xfrm_selector_match(&x->sel, fl, family))) && security_xfrm_state_pol_flow_match(x, pol, fl)) *error = -ESRCH; } @@ -1072,7 +1075,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, + xfrm_state_look_at(pol, x, fl, family, &best, &acquire_in_progress, &error); } if (best || acquire_in_progress) @@ -1089,7 +1092,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, + xfrm_state_look_at(pol, x, fl, family, &best, &acquire_in_progress, &error); } @@ -1441,6 +1444,30 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE +static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security) +{ + struct xfrm_user_sec_ctx *uctx; + int size = sizeof(*uctx) + security->ctx_len; + int err; + + uctx = kmalloc(size, GFP_KERNEL); + if (!uctx) + return -ENOMEM; + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = size; + uctx->ctx_doi = security->ctx_doi; + uctx->ctx_alg = security->ctx_alg; + uctx->ctx_len = security->ctx_len; + memcpy(uctx + 1, security->ctx_str, security->ctx_len); + err = security_xfrm_state_alloc(x, uctx); + kfree(uctx); + if (err) + return err; + + return 0; +} + static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, struct xfrm_encap_tmpl *encap) { @@ -1497,6 +1524,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, goto error; } + if (orig->security) + if (clone_security(x, orig->security)) + goto error; + if (orig->coaddr) { x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), GFP_KERNEL); @@ -1510,6 +1541,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, } memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); if (xfrm_init_state(x) < 0) goto error; @@ -1521,7 +1553,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; x->replay_maxage = orig->replay_maxage; - x->curlft.add_time = orig->curlft.add_time; + memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft)); x->km.state = orig->km.state; x->km.seq = orig->km.seq; x->replay = orig->replay; diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8462690a039b..4828913703b6 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -25,7 +25,7 @@ endif LIBBPF = $(LIBBPF_PATH)libbpf.a -BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) $(LIBBPF): FORCE $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7dfca7016aaa..6bdbc389b493 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -659,6 +659,12 @@ struct btf *btf__parse_raw(const char *path) err = -EIO; goto err_out; } + if (magic == __bswap_16(BTF_MAGIC)) { + /* non-native endian raw BTF */ + pr_warn("non-native BTF endianness is not supported\n"); + err = -LIBBPF_ERRNO__ENDIAN; + goto err_out; + } if (magic != BTF_MAGIC) { /* definitely not a raw BTF */ err = -EPROTO; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7253b833576c..e493d6048143 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6925,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = { BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_XDP_CPUMAP), - BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP, + BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, BPF_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), |