From 36d763509be326bb383b1b1852a129ff58d74e3b Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 27 Jul 2022 17:40:53 +0200 Subject: xfrm: fix XFRMA_LASTUSED comment It is a __u64, internally time64_t. Fixes: bf825f81b454 ("xfrm: introduce basic mark infrastructure") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 65e13a099b1a..a9f5d884560a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -296,7 +296,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ - XFRMA_LASTUSED, /* unsigned long */ + XFRMA_LASTUSED, /* __u64 */ XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ -- cgit v1.2.3 From 0b2f3212b551a87fe936701fa0813032861a3308 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Aug 2022 10:59:57 +0200 Subject: netfilter: nfnetlink: re-enable conntrack expectation events To avoid allocation of the conntrack extension area when possible, the default behaviour was changed to only allocate the event extension if a userspace program is subscribed to a notification group. Problem is that while 'conntrack -E' does enable the event allocation behind the scenes, 'conntrack -E expect' does not: no expectation events are delivered unless user sets "net.netfilter.nf_conntrack_events" back to 1 (always on). Fix the autodetection to also consider EXP type group. We need to track the 6 event groups (3+3, new/update/destroy for events and for expectations each) independently, else we'd disable events again if an expectation group becomes empty while there is still an active event group. Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist") Reported-by: Yi Chen Signed-off-by: Florian Westphal --- include/net/netns/conntrack.h | 2 +- net/netfilter/nfnetlink.c | 83 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0677cd3de034..c396a3862e80 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -95,7 +95,7 @@ struct nf_ip_net { struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS - bool ctnetlink_has_listener; + u8 ctnetlink_has_listener; bool ecache_dwork_pending; #endif u8 sysctl_log_invalid; /* Log invalid packets */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c24b1240908f..9c44518cb70f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static DEFINE_SPINLOCK(nfnl_grp_active_lock); +#endif + struct nfnl_net { struct sock *nfnl; }; @@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb) netlink_rcv_skb(skb, nfnetlink_rcv_msg); } +static void nfnetlink_bind_event(struct net *net, unsigned int group) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + u8 v; + + /* All NFNLGRP_CONNTRACK_* group bits fit into u8. + * The other groups are not relevant and can be ignored. + */ + if (group >= 8) + return; + + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + v = READ_ONCE(net->ct.ctnetlink_has_listener); + if ((v & group_bit) == 0) { + v |= group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); + } + + spin_unlock(&nfnl_grp_active_lock); +#endif +} + static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; @@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group) if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); -#ifdef CONFIG_NF_CONNTRACK_EVENTS - if (type == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); - } -#endif + nfnetlink_bind_event(net, group); return 0; } static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS + int type, group_bit; + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) return; - if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - if (!nfnetlink_has_listeners(net, group)) - WRITE_ONCE(net->ct.ctnetlink_has_listener, false); - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); + type = nfnl_group2type[group]; + + switch (type) { + case NFNL_SUBSYS_CTNETLINK: + break; + case NFNL_SUBSYS_CTNETLINK_EXP: + break; + default: + return; + } + + /* ctnetlink_has_listener is u8 */ + if (group >= 8) + return; + + group_bit = (1 << group); + + spin_lock(&nfnl_grp_active_lock); + if (!nfnetlink_has_listeners(net, group)) { + u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); + + v &= ~group_bit; + + /* read concurrently without nfnl_grp_active_lock held. */ + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); } + spin_unlock(&nfnl_grp_active_lock); #endif } -- cgit v1.2.3 From 6d17a112e9a63ff6a5edffd1676b99e0ffbcd269 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Tue, 2 Aug 2022 10:42:31 +0900 Subject: scsi: ufs: core: Enable link lost interrupt Link lost is treated as fatal error with commit c99b9b230149 ("scsi: ufs: Treat link loss as fatal error"), but the event isn't registered as interrupt source. Enable it. Link: https://lore.kernel.org/r/1659404551-160958-1-git-send-email-kwmad.kim@samsung.com Fixes: c99b9b230149 ("scsi: ufs: Treat link loss as fatal error") Reviewed-by: Bart Van Assche Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index f81aa95ffbc4..f525566a0864 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK) -#define UFSHCD_ERROR_MASK (UIC_ERROR |\ - DEVICE_FATAL_ERROR |\ - CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) +#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ -- cgit v1.2.3 From 67f4b5dc49913abcdb5cc736e73674e2f352f81d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:22:25 -0400 Subject: NFS: Fix another fsync() issue after a server reboot Currently, when the writeback code detects a server reboot, it redirties any pages that were not committed to disk, and it sets the flag NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor that dirtied the file. While this allows the file descriptor in question to redrive its own writes, it violates the fsync() requirement that we should be synchronising all writes to disk. While the problem is infrequent, we do see corner cases where an untimely server reboot causes the fsync() call to abandon its attempt to sync data to disk and causing data corruption issues due to missed error conditions or similar. In order to tighted up the client's ability to deal with this situation without introducing livelocks, add a counter that records the number of times pages are redirtied due to a server reboot-like condition, and use that in fsync() to redrive the sync to disk. Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++++++--------- fs/nfs/inode.c | 1 + fs/nfs/write.c | 6 ++++-- include/linux/nfs_fs.h | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 54237a231687..749e5487df50 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync) int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + struct nfs_inode *nfsi = NFS_I(inode); + long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); + long nredirtied; int ret; trace_nfs_fsync_enter(inode); @@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = pnfs_sync_inode(inode, !!datasync); if (ret != 0) break; - if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + nredirtied = atomic_long_read(&nfsi->redirtied_pages); + if (nredirtied == save_nredirtied) break; - /* - * If nfs_file_fsync_commit detected a server reboot, then - * resend all dirty pages that might have been covered by - * the NFS_CONTEXT_RESEND_WRITES flag - */ - start = 0; - end = LLONG_MAX; + save_nredirtied = nredirtied; } trace_nfs_fsync_exit(inode, ret); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b4e46b0ffa2d..bea7c005119c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) static void nfs_inode_init_regular(struct nfs_inode *nfsi) { atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->redirtied_pages, 0); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4a3796811b4b..989c734cf91d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); + /* Bump the transmission count */ req->wb_nio++; nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&nfsi->redirtied_pages); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1904,7 +1906,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk_cont(" mismatch\n"); nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); next: nfs_unlock_and_release_request(req); /* Latency breaker */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b32ed68e7dc4..f08e581f0161 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -182,6 +182,7 @@ struct nfs_inode { /* Regular file */ struct { atomic_long_t nrequests; + atomic_long_t redirtied_pages; struct nfs_mds_commit_info commit_info; struct mutex commit_mutex; }; -- cgit v1.2.3 From 5f6277a0c15e1ea54b6fd3d78c9fff7bfe42556c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:51:45 -0400 Subject: NFS: Cleanup to remove unused flag NFS_CONTEXT_RESEND_WRITES Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f08e581f0161..7931fa472561 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,7 +83,6 @@ struct nfs_open_context { fmode_t mode; unsigned long flags; -#define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) -- cgit v1.2.3 From 0ff4eb3d5ebbf72a7fc355e6001a0a6740662bf9 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 11 Aug 2022 18:20:12 +0300 Subject: neighbour: make proxy_queue.qlen limit per-device Right now we have a neigh_param PROXY_QLEN which specifies maximum length of neigh_table->proxy_queue. But in fact, this limitation doesn't work well because check condition looks like: tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN) The problem is that p (struct neigh_parms) is a per-device thing, but tbl (struct neigh_table) is a system-wide global thing. It seems reasonable to make proxy_queue limit per-device based. v2: - nothing changed in this patch v3: - rebase to net tree Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Daniel Borkmann Cc: David Ahern Cc: Yajun Deng Cc: Roopa Prabhu Cc: Christian Brauner Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Alexey Kuznetsov Cc: Alexander Mikhalitsyn Cc: Konstantin Khorenko Cc: kernel@openvz.org Cc: devel@openvz.org Suggested-by: Denis V. Lunev Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + net/core/neighbour.c | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9f0bab0589d9..3827a6b395fd 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -83,6 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; + int qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0e38a05d5b23..5b669eb80270 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -316,9 +316,18 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) skb = skb_peek(list); while (skb != NULL) { struct sk_buff *skb_next = skb_peek_next(skb, list); - if (net == NULL || net_eq(dev_net(skb->dev), net)) { + struct net_device *dev = skb->dev; + if (net == NULL || net_eq(dev_net(dev), net)) { + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + in_dev->arp_parms->qlen--; + rcu_read_unlock(); __skb_unlink(skb, list); - dev_put(skb->dev); + + dev_put(dev); kfree_skb(skb); } skb = skb_next; @@ -1606,8 +1615,15 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + in_dev->arp_parms->qlen--; + rcu_read_unlock(); __skb_unlink(skb, &tbl->proxy_queue); + if (tbl->proxy_redo && netif_running(dev)) { rcu_read_lock(); tbl->proxy_redo(skb); @@ -1632,7 +1648,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, unsigned long sched_next = jiffies + prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); - if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { + if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } @@ -1648,6 +1664,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); + p->qlen++; mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } @@ -1680,6 +1697,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, refcount_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + p->qlen = 0; netdev_hold(dev, &p->dev_tracker, GFP_KERNEL); p->dev = dev; write_pnet(&p->net, net); @@ -1745,6 +1763,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) refcount_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); + tbl->parms.qlen = 0; tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) -- cgit v1.2.3 From 0a90ed8d0cfa29735a221eba14d9cb6c735d35b6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 1 Aug 2022 14:37:31 +0300 Subject: platform/x86: pmc_atom: Fix SLP_TYPx bitfield mask On Intel hardware the SLP_TYPx bitfield occupies bits 10-12 as per ACPI specification (see Table 4.13 "PM1 Control Registers Fixed Hardware Feature Control Bits" for the details). Fix the mask and other related definitions accordingly. Fixes: 93e5eadd1f6e ("x86/platform: New Intel Atom SOC power management controller driver") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220801113734.36131-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/pmc_atom.c | 2 +- include/linux/platform_data/x86/pmc_atom.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 154317e9910d..5c757c7f64de 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -232,7 +232,7 @@ static void pmc_power_off(void) pm1_cnt_port = acpi_base_addr + PM1_CNT; pm1_cnt_value = inl(pm1_cnt_port); - pm1_cnt_value &= SLEEP_TYPE_MASK; + pm1_cnt_value &= ~SLEEP_TYPE_MASK; pm1_cnt_value |= SLEEP_TYPE_S5; pm1_cnt_value |= SLEEP_ENABLE; diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index 3edfb6d4e67a..dd81f510e4cf 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -7,6 +7,8 @@ #ifndef PMC_ATOM_H #define PMC_ATOM_H +#include + /* ValleyView Power Control Unit PCI Device ID */ #define PCI_DEVICE_ID_VLV_PMC 0x0F1C /* CherryTrail Power Control Unit PCI Device ID */ @@ -139,9 +141,9 @@ #define ACPI_MMIO_REG_LEN 0x100 #define PM1_CNT 0x4 -#define SLEEP_TYPE_MASK 0xFFFFECFF +#define SLEEP_TYPE_MASK GENMASK(12, 10) #define SLEEP_TYPE_S5 0x1C00 -#define SLEEP_ENABLE 0x2000 +#define SLEEP_ENABLE BIT(13) extern int pmc_atom_read(int offset, u32 *value); -- cgit v1.2.3 From be599244865cb788abe2c6f59ccb05d7240448e4 Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 9 Aug 2022 19:36:33 +0200 Subject: cpumask: align signatures of UP implementations Between the generic version, and their uniprocessor optimised implementations, the return types of cpumask_any_and_distribute() and cpumask_any_distribute() are not identical. Change the UP versions to 'unsigned int', to match the generic versions. Suggested-by: Yury Norov Signed-off-by: Sander Vanheule Signed-off-by: Yury Norov --- include/linux/cpumask.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 0d435d0edbcb..d8c2a40f8beb 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -202,12 +202,13 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) return 0; } -static inline int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) { +static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) +{ return cpumask_first_and(src1p, src2p); } -static inline int cpumask_any_distribute(const struct cpumask *srcp) +static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } -- cgit v1.2.3 From 2248ccd80124e61c2c84a22b22409bab452e1f0c Mon Sep 17 00:00:00 2001 From: Sander Vanheule Date: Tue, 9 Aug 2022 19:36:34 +0200 Subject: lib/cpumask: add inline cpumask_next_wrap() for UP In the uniprocessor case, cpumask_next_wrap() can be simplified, as the number of valid argument combinations is limited: - 'start' can only be 0 - 'n' can only be -1 or 0 The only valid CPU that can then be returned, if any, will be the first one set in the provided 'mask'. For NR_CPUS == 1, include/linux/cpumask.h now provides an inline definition of cpumask_next_wrap(), which will conflict with the one provided by lib/cpumask.c. Make building of lib/cpumask.o again depend on CONFIG_SMP=y (i.e. NR_CPUS > 1) to avoid the re-definition. Suggested-by: Yury Norov Signed-off-by: Sander Vanheule Signed-off-by: Yury Norov --- include/linux/cpumask.h | 19 +++++++++++++++++++ lib/Makefile | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d8c2a40f8beb..bd047864c7ac 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -262,7 +262,26 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +#if NR_CPUS == 1 +static inline +unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + cpumask_check(start); + if (n != -1) + cpumask_check(n); + + /* + * Return the first available CPU when wrapping, or when starting before cpu0, + * since there is only one valid option. + */ + if (wrap && n >= 0) + return nr_cpumask_bits; + + return cpumask_first(mask); +} +#else unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); +#endif /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location diff --git a/lib/Makefile b/lib/Makefile index c95212141928..5927d7fa0806 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,9 +34,10 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o cpumask.o + buildid.o lib-$(CONFIG_PRINTK) += dump_stack.o +lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o obj-y += lockref.o -- cgit v1.2.3 From 76b079ef4cc954fc2c2e0333a01855b0b2b6bdee Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:09 +0800 Subject: sched/psi: Remove unused parameter nbytes of psi_trigger_create() psi_trigger_create()'s 'nbytes' parameter is not used, so we can remove it. Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/psi.h | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/sched/psi.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/psi.h b/include/linux/psi.h index 89784763d19e..dd74411ac21d 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -27,7 +27,7 @@ void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, size_t nbytes, enum psi_res res); + char *buf, enum psi_res res); void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ffaccd6373f1..df7df5843b4f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3698,7 +3698,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, } psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; - new = psi_trigger_create(psi, buf, nbytes, res); + new = psi_trigger_create(psi, buf, res); if (IS_ERR(new)) { cgroup_put(cgrp); return PTR_ERR(new); diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 5ee615a59fe1..ecb4b4ff4ce0 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1087,7 +1087,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) } struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, size_t nbytes, enum psi_res res) + char *buf, enum psi_res res) { struct psi_trigger *t; enum psi_states state; @@ -1316,7 +1316,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, return -EBUSY; } - new = psi_trigger_create(&psi_system, buf, nbytes, res); + new = psi_trigger_create(&psi_system, buf, res); if (IS_ERR(new)) { mutex_unlock(&seq->lock); return PTR_ERR(new); -- cgit v1.2.3 From d7ae5818c3fa3007dee13f9d99832e7f26b8bc44 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Sat, 6 Aug 2022 20:05:10 +0800 Subject: sched/psi: Remove redundant cgroup_psi() when !CONFIG_CGROUPS cgroup_psi() is only called under CONFIG_CGROUPS. We don't need cgroup_psi() when !CONFIG_CGROUPS, so we can remove it in this case. Signed-off-by: Hao Jia Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed53bfe7c46c..ac5d0515680e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -734,11 +734,6 @@ static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) return NULL; } -static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) -{ - return NULL; -} - static inline bool cgroup_psi_enabled(void) { return false; -- cgit v1.2.3 From 484b9fa4886bd9377969aad5e9ea17efda4ecda6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 16 Aug 2022 01:36:31 -0400 Subject: virtio: Revert "virtio: add helper virtio_find_vqs_ctx_size()" This reverts commit fe3dc04e31aa51f91dc7f741a5f76cc4817eb5b4: the API is now unused and in fact can't be implemented on top of a legacy device. Fixes: fe3dc04e31aa ("virtio: add helper virtio_find_vqs_ctx_size()") Cc: "Xuan Zhuo" Signed-off-by: Michael S. Tsirkin Message-Id: <20220816053602.173815-3-mst@redhat.com> --- include/linux/virtio_config.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 6adff09f7170..888f7e96f0c7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -241,18 +241,6 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, ctx, desc); } -static inline -int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, struct irq_affinity *desc) -{ - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, - ctx, desc); -} - /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks * @vdev: the device -- cgit v1.2.3 From 9993a4f989c7ca5e227329b2878f65d05c9fc20f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 16 Aug 2022 01:36:58 -0400 Subject: virtio: Revert "virtio: find_vqs() add arg sizes" This reverts commit a10fba0377145fccefea4dc4dd5915b7ed87e546: the proposed API isn't supported on all transports but no effort was made to address this. It might not be hard to fix if we want to: maybe just rename size to size_hint and make sure legacy transports ignore the hint. But it's not sure what the benefit is in any case, so let's drop it. Fixes: a10fba037714 ("virtio: find_vqs() add arg sizes") Signed-off-by: Michael S. Tsirkin Message-Id: <20220816053602.173815-8-mst@redhat.com> --- arch/um/drivers/virtio_uml.c | 2 +- drivers/platform/mellanox/mlxbf-tmfifo.c | 1 - drivers/remoteproc/remoteproc_virtio.c | 1 - drivers/s390/virtio/virtio_ccw.c | 1 - drivers/virtio/virtio_mmio.c | 1 - drivers/virtio/virtio_pci_common.c | 2 +- drivers/virtio/virtio_pci_common.h | 2 +- drivers/virtio/virtio_pci_modern.c | 7 ++----- drivers/virtio/virtio_vdpa.c | 1 - include/linux/virtio_config.h | 14 +++++--------- 10 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 79e38afd4b91..e719af8bdf56 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1011,7 +1011,7 @@ error_kzalloc: static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 8be13d416f48..1ae3c56b66b0 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -928,7 +928,6 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 81c4f5776109..0f7706e23eb9 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -158,7 +158,6 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool * ctx, struct irq_affinity *desc) { diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 896896e32664..a10dbe632ef9 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -637,7 +637,6 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index dfcecfd7aba1..3ff746e3f24a 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -474,7 +474,6 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7ad734584823..ad258a9d3b9f 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -396,7 +396,7 @@ out_del_vqs: /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { int err; diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index a5ff838b85a5..23112d84218f 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -110,7 +110,7 @@ void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], u32 sizes[], const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index be51ec849252..c3b9f2761849 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -347,15 +347,12 @@ err: static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx, - desc); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); if (rc) return rc; diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 832d2c5b1b19..9670cc79371d 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -269,7 +269,6 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], - u32 sizes[], const bool *ctx, struct irq_affinity *desc) { diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 888f7e96f0c7..36ec7be1f480 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -55,7 +55,6 @@ struct virtio_shm_region { * include a NULL entry for vqs that do not need a callback * names: array of virtqueue names (mainly for debugging) * include a NULL entry for vqs unused by driver - * sizes: array of virtqueue sizes * Returns 0 on success or error status * @del_vqs: free virtqueues found by find_vqs(). * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) @@ -104,9 +103,7 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], - u32 sizes[], - const bool *ctx, + const char * const names[], const bool *ctx, struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); @@ -215,7 +212,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, const char *names[] = { n }; struct virtqueue *vq; int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, - NULL, NULL); + NULL); if (err < 0) return ERR_PTR(err); return vq; @@ -227,8 +224,7 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, - NULL, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); } static inline @@ -237,8 +233,8 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, - ctx, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); } /** -- cgit v1.2.3 From 5c669c4a4c6aa0489848093c93b8029f5c5c75ec Mon Sep 17 00:00:00 2001 From: Ricardo Cañuelo Date: Wed, 10 Aug 2022 11:40:03 +0200 Subject: virtio: kerneldocs fixes and enhancements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix variable names in some kerneldocs, naming in others. Add kerneldocs for struct vring_desc and vring_interrupt. Signed-off-by: Ricardo Cañuelo Message-Id: <20220810094004.1250-2-ricardo.canuelo@collabora.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- drivers/virtio/virtio_ring.c | 8 ++++++++ include/linux/virtio.h | 6 +++--- include/linux/virtio_config.h | 6 +++--- include/uapi/linux/virtio_ring.h | 16 +++++++++++----- 4 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d66c8e6d0ef3..4620e9d79dde 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2426,6 +2426,14 @@ static inline bool more_used(const struct vring_virtqueue *vq) return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); } +/** + * vring_interrupt - notify a virtqueue on an interrupt + * @irq: the IRQ number (ignored) + * @_vq: the struct virtqueue to notify + * + * Calls the callback function of @_vq to process the virtqueue + * notification. + */ irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index a3f73bb6733e..dcab9c7e8784 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -11,7 +11,7 @@ #include /** - * virtqueue - a queue to register buffers for sending or receiving. + * struct virtqueue - a queue to register buffers for sending or receiving. * @list: the chain of virtqueues for this device * @callback: the function to call when buffers are consumed (can be NULL). * @name: the name of this virtqueue (mainly for debugging) @@ -97,7 +97,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); /** - * virtio_device - representation of a device using virtio + * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) * @config_enabled: configuration change reporting enabled @@ -156,7 +156,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev); list_for_each_entry(vq, &vdev->vqs, list) /** - * virtio_driver - operations for a virtio I/O driver + * struct virtio_driver - operations for a virtio I/O driver * @driver: underlying device driver (populate name and owner). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 36ec7be1f480..4b517649cfe8 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -239,7 +239,7 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks - * @vdev: the device + * @dev: the virtio device */ static inline void virtio_synchronize_cbs(struct virtio_device *dev) @@ -258,7 +258,7 @@ void virtio_synchronize_cbs(struct virtio_device *dev) /** * virtio_device_ready - enable vq use in probe function - * @vdev: the device + * @dev: the virtio device * * Driver must call this to use vqs in the probe function. * @@ -306,7 +306,7 @@ const char *virtio_bus_name(struct virtio_device *vdev) /** * virtqueue_set_affinity - setting affinity for a virtqueue * @vq: the virtqueue - * @cpu: the cpu no. + * @cpu_mask: the cpu mask * * Pay attention the function are best-effort: the affinity hint may not be set * due to config support, irq type and sharing. diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 476d3e5c0fe7..f8c20d3de8da 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -93,15 +93,21 @@ #define VRING_USED_ALIGN_SIZE 4 #define VRING_DESC_ALIGN_SIZE 16 -/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +/** + * struct vring_desc - Virtio ring descriptors, + * 16 bytes long. These can chain together via @next. + * + * @addr: buffer address (guest-physical) + * @len: buffer length + * @flags: descriptor flags + * @next: index of the next descriptor in the chain, + * if the VRING_DESC_F_NEXT flag is set. We chain unused + * descriptors via this, too. + */ struct vring_desc { - /* Address (guest-physical). */ __virtio64 addr; - /* Length. */ __virtio32 len; - /* The flags as indicated above. */ __virtio16 flags; - /* We chain unused descriptors via this, too */ __virtio16 next; }; -- cgit v1.2.3 From 415d832497098030241605c52ea83d4e2cfa7879 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 16 Aug 2022 16:03:11 +0900 Subject: locking/atomic: Make test_and_*_bit() ordered on failure These operations are documented as always ordered in include/asm-generic/bitops/instrumented-atomic.h, and producer-consumer type use cases where one side needs to ensure a flag is left pending after some shared data was updated rely on this ordering, even in the failure case. This is the case with the workqueue code, which currently suffers from a reproducible ordering violation on Apple M1 platforms (which are notoriously out-of-order) that ends up causing the TTY layer to fail to deliver data to userspace properly under the right conditions. This change fixes that bug. Change the documentation to restrict the "no order on failure" story to the _lock() variant (for which it makes sense), and remove the early-exit from the generic implementation, which is what causes the missing barrier semantics in that case. Without this, the remaining atomic op is fully ordered (including on ARM64 LSE, as of recent versions of the architecture spec). Suggested-by: Linus Torvalds Cc: stable@vger.kernel.org Fixes: e986a0d6cb36 ("locking/atomics, asm-generic/bitops/atomic.h: Rewrite using atomic_*() APIs") Fixes: 61e02392d3c7 ("locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit()") Signed-off-by: Hector Martin Acked-by: Will Deacon Reviewed-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- Documentation/atomic_bitops.txt | 2 +- include/asm-generic/bitops/atomic.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index 093cdaefdb37..d8b101c97031 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -59,7 +59,7 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_{}_bit() operations, + otherwise the above rules apply. In the case of test_and_set_bit_lock(), if the bit in memory is unchanged by the operation then it is deemed to have failed. diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 3096f086b5a3..71ab4ba9c25d 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -39,9 +39,6 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (READ_ONCE(*p) & mask) - return 1; - old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -53,9 +50,6 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (!(READ_ONCE(*p) & mask)) - return 0; - old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p); return !!(old & mask); } -- cgit v1.2.3 From 3024d95a4c521c278a7504ee9e80c57c3a9750e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 17 Aug 2022 23:32:09 +0200 Subject: bpf: Partially revert flexible-array member replacement Partially revert 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") given it breaks BPF UAPI. For example, BPF CI run reveals build breakage under LLVM: [...] CLNG-BPF [test_maps] map_ptr_kern.o CLNG-BPF [test_maps] btf__core_reloc_arrays___diff_arr_val_sz.o CLNG-BPF [test_maps] test_bpf_cookie.o progs/map_ptr_kern.c:314:26: error: field 'trie_key' with variable sized type 'struct bpf_lpm_trie_key' not at the end of a struct or class is a GNU extension [-Werror,-Wgnu-variable-sized-type-not-at-end] struct bpf_lpm_trie_key trie_key; ^ CLNG-BPF [test_maps] btf__core_reloc_type_based___diff.o 1 error generated. make: *** [Makefile:521: /tmp/runner/work/bpf/bpf/tools/testing/selftests/bpf/map_ptr_kern.o] Error 1 make: *** Waiting for unfinished jobs.... [...] Typical usage of the bpf_lpm_trie_key is that the struct gets embedded into a user defined key for the LPM BPF map, from the selftest example: struct bpf_lpm_trie_key { <-- UAPI exported struct __u32 prefixlen; __u8 data[]; }; struct lpm_key { <-- BPF program defined struct struct bpf_lpm_trie_key trie_key; __u32 data; }; Undo this for BPF until a different solution can be found. It's the only flexible- array member case in the UAPI header. This was discovered in BPF CI after Dave reported that the include/uapi/linux/bpf.h header was out of sync with tools/include/uapi/linux/bpf.h after 94dfc73e7cf4. And the subsequent sync attempt failed CI. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Reported-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Link: https://lore.kernel.org/bpf/22aebc88-da67-f086-e620-dd4a16e2bc69@iogearbox.net --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be..59a217ca2dfd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -79,7 +79,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[]; /* Arbitrary size */ + __u8 data[0]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { -- cgit v1.2.3 From fc4aaf9fb3c99bcb326d52f9d320ed5680bd1cee Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 16 Aug 2022 10:34:40 +0100 Subject: net: Fix suspicious RCU usage in bpf_sk_reuseport_detach() bpf_sk_reuseport_detach() calls __rcu_dereference_sk_user_data_with_flags() to obtain the value of sk->sk_user_data, but that function is only usable if the RCU read lock is held, and neither that function nor any of its callers hold it. Fix this by adding a new helper, __locked_read_sk_user_data_with_flags() that checks to see if sk->sk_callback_lock() is held and use that here instead. Alternatively, making __rcu_dereference_sk_user_data_with_flags() use rcu_dereference_checked() might suffice. Without this, the following warning can be occasionally observed: ============================= WARNING: suspicious RCU usage 6.0.0-rc1-build2+ #563 Not tainted ----------------------------- include/net/sock.h:592 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 5 locks held by locktest/29873: #0: ffff88812734b550 (&sb->s_type->i_mutex_key#9){+.+.}-{3:3}, at: __sock_release+0x77/0x121 #1: ffff88812f5621b0 (sk_lock-AF_INET){+.+.}-{0:0}, at: tcp_close+0x1c/0x70 #2: ffff88810312f5c8 (&h->lhash2[i].lock){+.+.}-{2:2}, at: inet_unhash+0x76/0x1c0 #3: ffffffff83768bb8 (reuseport_lock){+...}-{2:2}, at: reuseport_detach_sock+0x18/0xdd #4: ffff88812f562438 (clock-AF_INET){++..}-{2:2}, at: bpf_sk_reuseport_detach+0x24/0xa4 stack backtrace: CPU: 1 PID: 29873 Comm: locktest Not tainted 6.0.0-rc1-build2+ #563 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack_lvl+0x4c/0x5f bpf_sk_reuseport_detach+0x6d/0xa4 reuseport_detach_sock+0x75/0xdd inet_unhash+0xa5/0x1c0 tcp_set_state+0x169/0x20f ? lockdep_sock_is_held+0x3a/0x3a ? __lock_release.isra.0+0x13e/0x220 ? reacquire_held_locks+0x1bb/0x1bb ? hlock_class+0x31/0x96 ? mark_lock+0x9e/0x1af __tcp_close+0x50/0x4b6 tcp_close+0x28/0x70 inet_release+0x8e/0xa7 __sock_release+0x95/0x121 sock_close+0x14/0x17 __fput+0x20f/0x36a task_work_run+0xa3/0xcc exit_to_user_mode_prepare+0x9c/0x14d syscall_exit_to_user_mode+0x18/0x44 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: cf8c1e967224 ("net: refactor bpf_sk_reuseport_detach()") Signed-off-by: David Howells cc: Hawkins Jiawei Link: https://lore.kernel.org/r/166064248071.3502205.10036394558814861778.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- include/net/sock.h | 25 +++++++++++++++++++++++++ kernel/bpf/reuseport_array.c | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 05a1bbdf5805..d08cfe190a78 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -577,6 +577,31 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) +/** + * __locked_read_sk_user_data_with_flags - return the pointer + * only if argument flags all has been set in sk_user_data. Otherwise + * return NULL + * + * @sk: socket + * @flags: flag bits + * + * The caller must be holding sk->sk_callback_lock. + */ +static inline void * +__locked_read_sk_user_data_with_flags(const struct sock *sk, + uintptr_t flags) +{ + uintptr_t sk_user_data = + (uintptr_t)rcu_dereference_check(__sk_user_data(sk), + lockdep_is_held(&sk->sk_callback_lock)); + + WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); + + if ((sk_user_data & flags) == flags) + return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); + return NULL; +} + /** * __rcu_dereference_sk_user_data_with_flags - return the pointer * only if argument flags all has been set in sk_user_data. Otherwise diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 85fa9dbfa8bf..82c61612f382 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk) struct sock __rcu **socks; write_lock_bh(&sk->sk_callback_lock); - socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); + socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); if (socks) { WRITE_ONCE(sk->sk_user_data, NULL); /* -- cgit v1.2.3 From 5152de7b79ab0be150f5966481b0c8f996192531 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 16 Aug 2022 16:53:46 +0300 Subject: net: mscc: ocelot: fix incorrect ndo_get_stats64 packet counters Reading stats using the SYS_COUNT_* register definitions is only used by ocelot_get_stats64() from the ocelot switchdev driver, however, currently the bucket definitions are incorrect. Separately, on both RX and TX, we have the following problems: - a 256-1023 bucket which actually tracks the 256-511 packets - the 1024-1526 bucket actually tracks the 512-1023 packets - the 1527-max bucket actually tracks the 1024-1526 packets => nobody tracks the packets from the real 1527-max bucket Additionally, the RX_PAUSE, RX_CONTROL, RX_LONGS and RX_CLASSIFIED_DROPS all track the wrong thing. However this doesn't seem to have any consequence, since ocelot_get_stats64() doesn't use these. Even though this problem only manifests itself for the switchdev driver, we cannot split the fix for ocelot and for DSA, since it requires fixing the bucket definitions from enum ocelot_reg, which makes us necessarily adapt the structures from felix and seville as well. Fixes: 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch") Fixes: 56051948773e ("net: dsa: ocelot: add driver for Felix switch family") Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 20 ++++++++++++-------- drivers/net/dsa/ocelot/seville_vsc9953.c | 16 +++++++++------- drivers/net/ethernet/mscc/ocelot_net.c | 6 ++++-- drivers/net/ethernet/mscc/vsc7514_regs.c | 24 +++++++++++++----------- include/soc/mscc/ocelot.h | 6 ++++-- 5 files changed, 42 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 5859ef3b242c..e1ebe21cad00 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -281,19 +281,23 @@ static const u32 vsc9959_sys_regmap[] = { REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), - REG(SYS_COUNT_RX_256_1023, 0x000030), - REG(SYS_COUNT_RX_1024_1526, 0x000034), - REG(SYS_COUNT_RX_1527_MAX, 0x000038), - REG(SYS_COUNT_RX_LONGS, 0x000044), + REG(SYS_COUNT_RX_256_511, 0x000030), + REG(SYS_COUNT_RX_512_1023, 0x000034), + REG(SYS_COUNT_RX_1024_1526, 0x000038), + REG(SYS_COUNT_RX_1527_MAX, 0x00003c), + REG(SYS_COUNT_RX_PAUSE, 0x000040), + REG(SYS_COUNT_RX_CONTROL, 0x000044), + REG(SYS_COUNT_RX_LONGS, 0x000048), REG(SYS_COUNT_TX_OCTETS, 0x000200), REG(SYS_COUNT_TX_COLLISION, 0x000210), REG(SYS_COUNT_TX_DROPS, 0x000214), REG(SYS_COUNT_TX_64, 0x00021c), REG(SYS_COUNT_TX_65_127, 0x000220), - REG(SYS_COUNT_TX_128_511, 0x000224), - REG(SYS_COUNT_TX_512_1023, 0x000228), - REG(SYS_COUNT_TX_1024_1526, 0x00022c), - REG(SYS_COUNT_TX_1527_MAX, 0x000230), + REG(SYS_COUNT_TX_128_255, 0x000224), + REG(SYS_COUNT_TX_256_511, 0x000228), + REG(SYS_COUNT_TX_512_1023, 0x00022c), + REG(SYS_COUNT_TX_1024_1526, 0x000230), + REG(SYS_COUNT_TX_1527_MAX, 0x000234), REG(SYS_COUNT_TX_AGING, 0x000278), REG(SYS_RESET_CFG, 0x000e00), REG(SYS_SR_ETYPE_CFG, 0x000e04), diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index ea0649211356..ebe9ddbbe2b7 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -277,19 +277,21 @@ static const u32 vsc9953_sys_regmap[] = { REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), - REG(SYS_COUNT_RX_256_1023, 0x000030), - REG(SYS_COUNT_RX_1024_1526, 0x000034), - REG(SYS_COUNT_RX_1527_MAX, 0x000038), + REG(SYS_COUNT_RX_256_511, 0x000030), + REG(SYS_COUNT_RX_512_1023, 0x000034), + REG(SYS_COUNT_RX_1024_1526, 0x000038), + REG(SYS_COUNT_RX_1527_MAX, 0x00003c), REG(SYS_COUNT_RX_LONGS, 0x000048), REG(SYS_COUNT_TX_OCTETS, 0x000100), REG(SYS_COUNT_TX_COLLISION, 0x000110), REG(SYS_COUNT_TX_DROPS, 0x000114), REG(SYS_COUNT_TX_64, 0x00011c), REG(SYS_COUNT_TX_65_127, 0x000120), - REG(SYS_COUNT_TX_128_511, 0x000124), - REG(SYS_COUNT_TX_512_1023, 0x000128), - REG(SYS_COUNT_TX_1024_1526, 0x00012c), - REG(SYS_COUNT_TX_1527_MAX, 0x000130), + REG(SYS_COUNT_TX_128_255, 0x000124), + REG(SYS_COUNT_TX_256_511, 0x000128), + REG(SYS_COUNT_TX_512_1023, 0x00012c), + REG(SYS_COUNT_TX_1024_1526, 0x000130), + REG(SYS_COUNT_TX_1527_MAX, 0x000134), REG(SYS_COUNT_TX_AGING, 0x000178), REG(SYS_RESET_CFG, 0x000318), REG_RESERVED(SYS_SR_ETYPE_CFG), diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 5e6136e80282..9d8cea16245e 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -739,7 +739,8 @@ static void ocelot_get_stats64(struct net_device *dev, ocelot_read(ocelot, SYS_COUNT_RX_64) + ocelot_read(ocelot, SYS_COUNT_RX_65_127) + ocelot_read(ocelot, SYS_COUNT_RX_128_255) + - ocelot_read(ocelot, SYS_COUNT_RX_256_1023) + + ocelot_read(ocelot, SYS_COUNT_RX_256_511) + + ocelot_read(ocelot, SYS_COUNT_RX_512_1023) + ocelot_read(ocelot, SYS_COUNT_RX_1024_1526) + ocelot_read(ocelot, SYS_COUNT_RX_1527_MAX); stats->multicast = ocelot_read(ocelot, SYS_COUNT_RX_MULTICAST); @@ -749,7 +750,8 @@ static void ocelot_get_stats64(struct net_device *dev, stats->tx_bytes = ocelot_read(ocelot, SYS_COUNT_TX_OCTETS); stats->tx_packets = ocelot_read(ocelot, SYS_COUNT_TX_64) + ocelot_read(ocelot, SYS_COUNT_TX_65_127) + - ocelot_read(ocelot, SYS_COUNT_TX_128_511) + + ocelot_read(ocelot, SYS_COUNT_TX_128_255) + + ocelot_read(ocelot, SYS_COUNT_TX_256_511) + ocelot_read(ocelot, SYS_COUNT_TX_512_1023) + ocelot_read(ocelot, SYS_COUNT_TX_1024_1526) + ocelot_read(ocelot, SYS_COUNT_TX_1527_MAX); diff --git a/drivers/net/ethernet/mscc/vsc7514_regs.c b/drivers/net/ethernet/mscc/vsc7514_regs.c index c2af4eb8ca5d..38ab20b48cd4 100644 --- a/drivers/net/ethernet/mscc/vsc7514_regs.c +++ b/drivers/net/ethernet/mscc/vsc7514_regs.c @@ -180,13 +180,14 @@ const u32 vsc7514_sys_regmap[] = { REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), - REG(SYS_COUNT_RX_256_1023, 0x000030), - REG(SYS_COUNT_RX_1024_1526, 0x000034), - REG(SYS_COUNT_RX_1527_MAX, 0x000038), - REG(SYS_COUNT_RX_PAUSE, 0x00003c), - REG(SYS_COUNT_RX_CONTROL, 0x000040), - REG(SYS_COUNT_RX_LONGS, 0x000044), - REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x000048), + REG(SYS_COUNT_RX_256_511, 0x000030), + REG(SYS_COUNT_RX_512_1023, 0x000034), + REG(SYS_COUNT_RX_1024_1526, 0x000038), + REG(SYS_COUNT_RX_1527_MAX, 0x00003c), + REG(SYS_COUNT_RX_PAUSE, 0x000040), + REG(SYS_COUNT_RX_CONTROL, 0x000044), + REG(SYS_COUNT_RX_LONGS, 0x000048), + REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), REG(SYS_COUNT_TX_OCTETS, 0x000100), REG(SYS_COUNT_TX_UNICAST, 0x000104), REG(SYS_COUNT_TX_MULTICAST, 0x000108), @@ -196,10 +197,11 @@ const u32 vsc7514_sys_regmap[] = { REG(SYS_COUNT_TX_PAUSE, 0x000118), REG(SYS_COUNT_TX_64, 0x00011c), REG(SYS_COUNT_TX_65_127, 0x000120), - REG(SYS_COUNT_TX_128_511, 0x000124), - REG(SYS_COUNT_TX_512_1023, 0x000128), - REG(SYS_COUNT_TX_1024_1526, 0x00012c), - REG(SYS_COUNT_TX_1527_MAX, 0x000130), + REG(SYS_COUNT_TX_128_255, 0x000124), + REG(SYS_COUNT_TX_256_511, 0x000128), + REG(SYS_COUNT_TX_512_1023, 0x00012c), + REG(SYS_COUNT_TX_1024_1526, 0x000130), + REG(SYS_COUNT_TX_1527_MAX, 0x000134), REG(SYS_COUNT_TX_AGING, 0x000170), REG(SYS_RESET_CFG, 0x000508), REG(SYS_CMID, 0x00050c), diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ac151ecc7f19..e7e5b06deb2d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -335,7 +335,8 @@ enum ocelot_reg { SYS_COUNT_RX_64, SYS_COUNT_RX_65_127, SYS_COUNT_RX_128_255, - SYS_COUNT_RX_256_1023, + SYS_COUNT_RX_256_511, + SYS_COUNT_RX_512_1023, SYS_COUNT_RX_1024_1526, SYS_COUNT_RX_1527_MAX, SYS_COUNT_RX_PAUSE, @@ -351,7 +352,8 @@ enum ocelot_reg { SYS_COUNT_TX_PAUSE, SYS_COUNT_TX_64, SYS_COUNT_TX_65_127, - SYS_COUNT_TX_128_511, + SYS_COUNT_TX_128_255, + SYS_COUNT_TX_256_511, SYS_COUNT_TX_512_1023, SYS_COUNT_TX_1024_1526, SYS_COUNT_TX_1527_MAX, -- cgit v1.2.3 From 22d842e3efe56402c33b5e6e303bb71ce9bf9334 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 16 Aug 2022 16:53:48 +0300 Subject: net: mscc: ocelot: turn stats_lock into a spinlock ocelot_get_stats64() currently runs unlocked and therefore may collide with ocelot_port_update_stats() which indirectly accesses the same counters. However, ocelot_get_stats64() runs in atomic context, and we cannot simply take the sleepable ocelot->stats_lock mutex. We need to convert it to an atomic spinlock first. Do that as a preparatory change. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 4 ++-- drivers/net/ethernet/mscc/ocelot.c | 11 +++++------ include/soc/mscc/ocelot.h | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index e1ebe21cad00..46fd6cd0d8f3 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -2171,7 +2171,7 @@ static void vsc9959_psfp_sgi_table_del(struct ocelot *ocelot, static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, struct felix_stream_filter_counters *counters) { - mutex_lock(&ocelot->stats_lock); + spin_lock(&ocelot->stats_lock); ocelot_rmw(ocelot, SYS_STAT_CFG_STAT_VIEW(index), SYS_STAT_CFG_STAT_VIEW_M, @@ -2188,7 +2188,7 @@ static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, SYS_STAT_CFG_STAT_CLEAR_SHOT(0x10), SYS_STAT_CFG); - mutex_unlock(&ocelot->stats_lock); + spin_unlock(&ocelot->stats_lock); } static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index d4649e4ee0e7..c67f162f8ab5 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1906,13 +1906,13 @@ static void ocelot_check_stats_work(struct work_struct *work) stats_work); int i, err; - mutex_lock(&ocelot->stats_lock); + spin_lock(&ocelot->stats_lock); for (i = 0; i < ocelot->num_phys_ports; i++) { err = ocelot_port_update_stats(ocelot, i); if (err) break; } - mutex_unlock(&ocelot->stats_lock); + spin_unlock(&ocelot->stats_lock); if (err) dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err); @@ -1925,7 +1925,7 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) { int i, err; - mutex_lock(&ocelot->stats_lock); + spin_lock(&ocelot->stats_lock); /* check and update now */ err = ocelot_port_update_stats(ocelot, port); @@ -1934,7 +1934,7 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) for (i = 0; i < ocelot->num_stats; i++) *data++ = ocelot->stats[port * ocelot->num_stats + i]; - mutex_unlock(&ocelot->stats_lock); + spin_unlock(&ocelot->stats_lock); if (err) dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err); @@ -3363,7 +3363,7 @@ int ocelot_init(struct ocelot *ocelot) if (!ocelot->stats) return -ENOMEM; - mutex_init(&ocelot->stats_lock); + spin_lock_init(&ocelot->stats_lock); mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); @@ -3511,7 +3511,6 @@ void ocelot_deinit(struct ocelot *ocelot) cancel_delayed_work(&ocelot->stats_work); destroy_workqueue(ocelot->stats_queue); destroy_workqueue(ocelot->owq); - mutex_destroy(&ocelot->stats_lock); } EXPORT_SYMBOL(ocelot_deinit); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index e7e5b06deb2d..72b9474391da 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -752,7 +752,7 @@ struct ocelot { struct ocelot_psfp_list psfp; /* Workqueue to check statistics for overflow with its lock */ - struct mutex stats_lock; + spinlock_t stats_lock; u64 *stats; struct delayed_work stats_work; struct workqueue_struct *stats_queue; -- cgit v1.2.3 From 9190460084ddd0e9235f55eab0fdd5456b5f2fd5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 16 Aug 2022 16:53:50 +0300 Subject: net: mscc: ocelot: make struct ocelot_stat_layout array indexable The ocelot counters are 32-bit and require periodic reading, every 2 seconds, by ocelot_port_update_stats(), so that wraparounds are detected. Currently, the counters reported by ocelot_get_stats64() come from the 32-bit hardware counters directly, rather than from the 64-bit accumulated ocelot->stats, and this is a problem for their integrity. The strategy is to make ocelot_get_stats64() able to cherry-pick individual stats from ocelot->stats the way in which it currently reads them out from SYS_COUNT_* registers. But currently it can't, because ocelot->stats is an opaque u64 array that's used only to feed data into ethtool -S. To solve that problem, we need to make ocelot->stats indexable, and associate each element with an element of struct ocelot_stat_layout used by ethtool -S. This makes ocelot_stat_layout a fat (and possibly sparse) array, so we need to change the way in which we access it. We no longer need OCELOT_STAT_END as a sentinel, because we know the array's size (OCELOT_NUM_STATS). We just need to skip the array elements that were left unpopulated for the switch revision (ocelot, felix, seville). Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 468 +++++++++++++++++++++++------ drivers/net/dsa/ocelot/seville_vsc9953.c | 468 +++++++++++++++++++++++------ drivers/net/ethernet/mscc/ocelot.c | 40 ++- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 468 +++++++++++++++++++++++------ include/soc/mscc/ocelot.h | 105 ++++++- 5 files changed, 1243 insertions(+), 306 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 46fd6cd0d8f3..c9f270f24b1c 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -551,101 +551,379 @@ static const struct reg_field vsc9959_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 7, 4), }; -static const struct ocelot_stat_layout vsc9959_stats_layout[] = { - { .offset = 0x00, .name = "rx_octets", }, - { .offset = 0x01, .name = "rx_unicast", }, - { .offset = 0x02, .name = "rx_multicast", }, - { .offset = 0x03, .name = "rx_broadcast", }, - { .offset = 0x04, .name = "rx_shorts", }, - { .offset = 0x05, .name = "rx_fragments", }, - { .offset = 0x06, .name = "rx_jabbers", }, - { .offset = 0x07, .name = "rx_crc_align_errs", }, - { .offset = 0x08, .name = "rx_sym_errs", }, - { .offset = 0x09, .name = "rx_frames_below_65_octets", }, - { .offset = 0x0A, .name = "rx_frames_65_to_127_octets", }, - { .offset = 0x0B, .name = "rx_frames_128_to_255_octets", }, - { .offset = 0x0C, .name = "rx_frames_256_to_511_octets", }, - { .offset = 0x0D, .name = "rx_frames_512_to_1023_octets", }, - { .offset = 0x0E, .name = "rx_frames_1024_to_1526_octets", }, - { .offset = 0x0F, .name = "rx_frames_over_1526_octets", }, - { .offset = 0x10, .name = "rx_pause", }, - { .offset = 0x11, .name = "rx_control", }, - { .offset = 0x12, .name = "rx_longs", }, - { .offset = 0x13, .name = "rx_classified_drops", }, - { .offset = 0x14, .name = "rx_red_prio_0", }, - { .offset = 0x15, .name = "rx_red_prio_1", }, - { .offset = 0x16, .name = "rx_red_prio_2", }, - { .offset = 0x17, .name = "rx_red_prio_3", }, - { .offset = 0x18, .name = "rx_red_prio_4", }, - { .offset = 0x19, .name = "rx_red_prio_5", }, - { .offset = 0x1A, .name = "rx_red_prio_6", }, - { .offset = 0x1B, .name = "rx_red_prio_7", }, - { .offset = 0x1C, .name = "rx_yellow_prio_0", }, - { .offset = 0x1D, .name = "rx_yellow_prio_1", }, - { .offset = 0x1E, .name = "rx_yellow_prio_2", }, - { .offset = 0x1F, .name = "rx_yellow_prio_3", }, - { .offset = 0x20, .name = "rx_yellow_prio_4", }, - { .offset = 0x21, .name = "rx_yellow_prio_5", }, - { .offset = 0x22, .name = "rx_yellow_prio_6", }, - { .offset = 0x23, .name = "rx_yellow_prio_7", }, - { .offset = 0x24, .name = "rx_green_prio_0", }, - { .offset = 0x25, .name = "rx_green_prio_1", }, - { .offset = 0x26, .name = "rx_green_prio_2", }, - { .offset = 0x27, .name = "rx_green_prio_3", }, - { .offset = 0x28, .name = "rx_green_prio_4", }, - { .offset = 0x29, .name = "rx_green_prio_5", }, - { .offset = 0x2A, .name = "rx_green_prio_6", }, - { .offset = 0x2B, .name = "rx_green_prio_7", }, - { .offset = 0x80, .name = "tx_octets", }, - { .offset = 0x81, .name = "tx_unicast", }, - { .offset = 0x82, .name = "tx_multicast", }, - { .offset = 0x83, .name = "tx_broadcast", }, - { .offset = 0x84, .name = "tx_collision", }, - { .offset = 0x85, .name = "tx_drops", }, - { .offset = 0x86, .name = "tx_pause", }, - { .offset = 0x87, .name = "tx_frames_below_65_octets", }, - { .offset = 0x88, .name = "tx_frames_65_to_127_octets", }, - { .offset = 0x89, .name = "tx_frames_128_255_octets", }, - { .offset = 0x8A, .name = "tx_frames_256_511_octets", }, - { .offset = 0x8B, .name = "tx_frames_512_1023_octets", }, - { .offset = 0x8C, .name = "tx_frames_1024_1526_octets", }, - { .offset = 0x8D, .name = "tx_frames_over_1526_octets", }, - { .offset = 0x8E, .name = "tx_yellow_prio_0", }, - { .offset = 0x8F, .name = "tx_yellow_prio_1", }, - { .offset = 0x90, .name = "tx_yellow_prio_2", }, - { .offset = 0x91, .name = "tx_yellow_prio_3", }, - { .offset = 0x92, .name = "tx_yellow_prio_4", }, - { .offset = 0x93, .name = "tx_yellow_prio_5", }, - { .offset = 0x94, .name = "tx_yellow_prio_6", }, - { .offset = 0x95, .name = "tx_yellow_prio_7", }, - { .offset = 0x96, .name = "tx_green_prio_0", }, - { .offset = 0x97, .name = "tx_green_prio_1", }, - { .offset = 0x98, .name = "tx_green_prio_2", }, - { .offset = 0x99, .name = "tx_green_prio_3", }, - { .offset = 0x9A, .name = "tx_green_prio_4", }, - { .offset = 0x9B, .name = "tx_green_prio_5", }, - { .offset = 0x9C, .name = "tx_green_prio_6", }, - { .offset = 0x9D, .name = "tx_green_prio_7", }, - { .offset = 0x9E, .name = "tx_aged", }, - { .offset = 0x100, .name = "drop_local", }, - { .offset = 0x101, .name = "drop_tail", }, - { .offset = 0x102, .name = "drop_yellow_prio_0", }, - { .offset = 0x103, .name = "drop_yellow_prio_1", }, - { .offset = 0x104, .name = "drop_yellow_prio_2", }, - { .offset = 0x105, .name = "drop_yellow_prio_3", }, - { .offset = 0x106, .name = "drop_yellow_prio_4", }, - { .offset = 0x107, .name = "drop_yellow_prio_5", }, - { .offset = 0x108, .name = "drop_yellow_prio_6", }, - { .offset = 0x109, .name = "drop_yellow_prio_7", }, - { .offset = 0x10A, .name = "drop_green_prio_0", }, - { .offset = 0x10B, .name = "drop_green_prio_1", }, - { .offset = 0x10C, .name = "drop_green_prio_2", }, - { .offset = 0x10D, .name = "drop_green_prio_3", }, - { .offset = 0x10E, .name = "drop_green_prio_4", }, - { .offset = 0x10F, .name = "drop_green_prio_5", }, - { .offset = 0x110, .name = "drop_green_prio_6", }, - { .offset = 0x111, .name = "drop_green_prio_7", }, - OCELOT_STAT_END +static const struct ocelot_stat_layout vsc9959_stats_layout[OCELOT_NUM_STATS] = { + [OCELOT_STAT_RX_OCTETS] = { + .name = "rx_octets", + .offset = 0x00, + }, + [OCELOT_STAT_RX_UNICAST] = { + .name = "rx_unicast", + .offset = 0x01, + }, + [OCELOT_STAT_RX_MULTICAST] = { + .name = "rx_multicast", + .offset = 0x02, + }, + [OCELOT_STAT_RX_BROADCAST] = { + .name = "rx_broadcast", + .offset = 0x03, + }, + [OCELOT_STAT_RX_SHORTS] = { + .name = "rx_shorts", + .offset = 0x04, + }, + [OCELOT_STAT_RX_FRAGMENTS] = { + .name = "rx_fragments", + .offset = 0x05, + }, + [OCELOT_STAT_RX_JABBERS] = { + .name = "rx_jabbers", + .offset = 0x06, + }, + [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { + .name = "rx_crc_align_errs", + .offset = 0x07, + }, + [OCELOT_STAT_RX_SYM_ERRS] = { + .name = "rx_sym_errs", + .offset = 0x08, + }, + [OCELOT_STAT_RX_64] = { + .name = "rx_frames_below_65_octets", + .offset = 0x09, + }, + [OCELOT_STAT_RX_65_127] = { + .name = "rx_frames_65_to_127_octets", + .offset = 0x0A, + }, + [OCELOT_STAT_RX_128_255] = { + .name = "rx_frames_128_to_255_octets", + .offset = 0x0B, + }, + [OCELOT_STAT_RX_256_511] = { + .name = "rx_frames_256_to_511_octets", + .offset = 0x0C, + }, + [OCELOT_STAT_RX_512_1023] = { + .name = "rx_frames_512_to_1023_octets", + .offset = 0x0D, + }, + [OCELOT_STAT_RX_1024_1526] = { + .name = "rx_frames_1024_to_1526_octets", + .offset = 0x0E, + }, + [OCELOT_STAT_RX_1527_MAX] = { + .name = "rx_frames_over_1526_octets", + .offset = 0x0F, + }, + [OCELOT_STAT_RX_PAUSE] = { + .name = "rx_pause", + .offset = 0x10, + }, + [OCELOT_STAT_RX_CONTROL] = { + .name = "rx_control", + .offset = 0x11, + }, + [OCELOT_STAT_RX_LONGS] = { + .name = "rx_longs", + .offset = 0x12, + }, + [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { + .name = "rx_classified_drops", + .offset = 0x13, + }, + [OCELOT_STAT_RX_RED_PRIO_0] = { + .name = "rx_red_prio_0", + .offset = 0x14, + }, + [OCELOT_STAT_RX_RED_PRIO_1] = { + .name = "rx_red_prio_1", + .offset = 0x15, + }, + [OCELOT_STAT_RX_RED_PRIO_2] = { + .name = "rx_red_prio_2", + .offset = 0x16, + }, + [OCELOT_STAT_RX_RED_PRIO_3] = { + .name = "rx_red_prio_3", + .offset = 0x17, + }, + [OCELOT_STAT_RX_RED_PRIO_4] = { + .name = "rx_red_prio_4", + .offset = 0x18, + }, + [OCELOT_STAT_RX_RED_PRIO_5] = { + .name = "rx_red_prio_5", + .offset = 0x19, + }, + [OCELOT_STAT_RX_RED_PRIO_6] = { + .name = "rx_red_prio_6", + .offset = 0x1A, + }, + [OCELOT_STAT_RX_RED_PRIO_7] = { + .name = "rx_red_prio_7", + .offset = 0x1B, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_0] = { + .name = "rx_yellow_prio_0", + .offset = 0x1C, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_1] = { + .name = "rx_yellow_prio_1", + .offset = 0x1D, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_2] = { + .name = "rx_yellow_prio_2", + .offset = 0x1E, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_3] = { + .name = "rx_yellow_prio_3", + .offset = 0x1F, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_4] = { + .name = "rx_yellow_prio_4", + .offset = 0x20, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_5] = { + .name = "rx_yellow_prio_5", + .offset = 0x21, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_6] = { + .name = "rx_yellow_prio_6", + .offset = 0x22, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_7] = { + .name = "rx_yellow_prio_7", + .offset = 0x23, + }, + [OCELOT_STAT_RX_GREEN_PRIO_0] = { + .name = "rx_green_prio_0", + .offset = 0x24, + }, + [OCELOT_STAT_RX_GREEN_PRIO_1] = { + .name = "rx_green_prio_1", + .offset = 0x25, + }, + [OCELOT_STAT_RX_GREEN_PRIO_2] = { + .name = "rx_green_prio_2", + .offset = 0x26, + }, + [OCELOT_STAT_RX_GREEN_PRIO_3] = { + .name = "rx_green_prio_3", + .offset = 0x27, + }, + [OCELOT_STAT_RX_GREEN_PRIO_4] = { + .name = "rx_green_prio_4", + .offset = 0x28, + }, + [OCELOT_STAT_RX_GREEN_PRIO_5] = { + .name = "rx_green_prio_5", + .offset = 0x29, + }, + [OCELOT_STAT_RX_GREEN_PRIO_6] = { + .name = "rx_green_prio_6", + .offset = 0x2A, + }, + [OCELOT_STAT_RX_GREEN_PRIO_7] = { + .name = "rx_green_prio_7", + .offset = 0x2B, + }, + [OCELOT_STAT_TX_OCTETS] = { + .name = "tx_octets", + .offset = 0x80, + }, + [OCELOT_STAT_TX_UNICAST] = { + .name = "tx_unicast", + .offset = 0x81, + }, + [OCELOT_STAT_TX_MULTICAST] = { + .name = "tx_multicast", + .offset = 0x82, + }, + [OCELOT_STAT_TX_BROADCAST] = { + .name = "tx_broadcast", + .offset = 0x83, + }, + [OCELOT_STAT_TX_COLLISION] = { + .name = "tx_collision", + .offset = 0x84, + }, + [OCELOT_STAT_TX_DROPS] = { + .name = "tx_drops", + .offset = 0x85, + }, + [OCELOT_STAT_TX_PAUSE] = { + .name = "tx_pause", + .offset = 0x86, + }, + [OCELOT_STAT_TX_64] = { + .name = "tx_frames_below_65_octets", + .offset = 0x87, + }, + [OCELOT_STAT_TX_65_127] = { + .name = "tx_frames_65_to_127_octets", + .offset = 0x88, + }, + [OCELOT_STAT_TX_128_255] = { + .name = "tx_frames_128_255_octets", + .offset = 0x89, + }, + [OCELOT_STAT_TX_256_511] = { + .name = "tx_frames_256_511_octets", + .offset = 0x8A, + }, + [OCELOT_STAT_TX_512_1023] = { + .name = "tx_frames_512_1023_octets", + .offset = 0x8B, + }, + [OCELOT_STAT_TX_1024_1526] = { + .name = "tx_frames_1024_1526_octets", + .offset = 0x8C, + }, + [OCELOT_STAT_TX_1527_MAX] = { + .name = "tx_frames_over_1526_octets", + .offset = 0x8D, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_0] = { + .name = "tx_yellow_prio_0", + .offset = 0x8E, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_1] = { + .name = "tx_yellow_prio_1", + .offset = 0x8F, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_2] = { + .name = "tx_yellow_prio_2", + .offset = 0x90, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_3] = { + .name = "tx_yellow_prio_3", + .offset = 0x91, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_4] = { + .name = "tx_yellow_prio_4", + .offset = 0x92, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_5] = { + .name = "tx_yellow_prio_5", + .offset = 0x93, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_6] = { + .name = "tx_yellow_prio_6", + .offset = 0x94, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_7] = { + .name = "tx_yellow_prio_7", + .offset = 0x95, + }, + [OCELOT_STAT_TX_GREEN_PRIO_0] = { + .name = "tx_green_prio_0", + .offset = 0x96, + }, + [OCELOT_STAT_TX_GREEN_PRIO_1] = { + .name = "tx_green_prio_1", + .offset = 0x97, + }, + [OCELOT_STAT_TX_GREEN_PRIO_2] = { + .name = "tx_green_prio_2", + .offset = 0x98, + }, + [OCELOT_STAT_TX_GREEN_PRIO_3] = { + .name = "tx_green_prio_3", + .offset = 0x99, + }, + [OCELOT_STAT_TX_GREEN_PRIO_4] = { + .name = "tx_green_prio_4", + .offset = 0x9A, + }, + [OCELOT_STAT_TX_GREEN_PRIO_5] = { + .name = "tx_green_prio_5", + .offset = 0x9B, + }, + [OCELOT_STAT_TX_GREEN_PRIO_6] = { + .name = "tx_green_prio_6", + .offset = 0x9C, + }, + [OCELOT_STAT_TX_GREEN_PRIO_7] = { + .name = "tx_green_prio_7", + .offset = 0x9D, + }, + [OCELOT_STAT_TX_AGED] = { + .name = "tx_aged", + .offset = 0x9E, + }, + [OCELOT_STAT_DROP_LOCAL] = { + .name = "drop_local", + .offset = 0x100, + }, + [OCELOT_STAT_DROP_TAIL] = { + .name = "drop_tail", + .offset = 0x101, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { + .name = "drop_yellow_prio_0", + .offset = 0x102, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { + .name = "drop_yellow_prio_1", + .offset = 0x103, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { + .name = "drop_yellow_prio_2", + .offset = 0x104, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { + .name = "drop_yellow_prio_3", + .offset = 0x105, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { + .name = "drop_yellow_prio_4", + .offset = 0x106, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { + .name = "drop_yellow_prio_5", + .offset = 0x107, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { + .name = "drop_yellow_prio_6", + .offset = 0x108, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { + .name = "drop_yellow_prio_7", + .offset = 0x109, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_0] = { + .name = "drop_green_prio_0", + .offset = 0x10A, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_1] = { + .name = "drop_green_prio_1", + .offset = 0x10B, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_2] = { + .name = "drop_green_prio_2", + .offset = 0x10C, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_3] = { + .name = "drop_green_prio_3", + .offset = 0x10D, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_4] = { + .name = "drop_green_prio_4", + .offset = 0x10E, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_5] = { + .name = "drop_green_prio_5", + .offset = 0x10F, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_6] = { + .name = "drop_green_prio_6", + .offset = 0x110, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_7] = { + .name = "drop_green_prio_7", + .offset = 0x111, + }, }; static const struct vcap_field vsc9959_vcap_es0_keys[] = { diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index ebe9ddbbe2b7..fe5d4642d0bc 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -545,101 +545,379 @@ static const struct reg_field vsc9953_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 11, 4), }; -static const struct ocelot_stat_layout vsc9953_stats_layout[] = { - { .offset = 0x00, .name = "rx_octets", }, - { .offset = 0x01, .name = "rx_unicast", }, - { .offset = 0x02, .name = "rx_multicast", }, - { .offset = 0x03, .name = "rx_broadcast", }, - { .offset = 0x04, .name = "rx_shorts", }, - { .offset = 0x05, .name = "rx_fragments", }, - { .offset = 0x06, .name = "rx_jabbers", }, - { .offset = 0x07, .name = "rx_crc_align_errs", }, - { .offset = 0x08, .name = "rx_sym_errs", }, - { .offset = 0x09, .name = "rx_frames_below_65_octets", }, - { .offset = 0x0A, .name = "rx_frames_65_to_127_octets", }, - { .offset = 0x0B, .name = "rx_frames_128_to_255_octets", }, - { .offset = 0x0C, .name = "rx_frames_256_to_511_octets", }, - { .offset = 0x0D, .name = "rx_frames_512_to_1023_octets", }, - { .offset = 0x0E, .name = "rx_frames_1024_to_1526_octets", }, - { .offset = 0x0F, .name = "rx_frames_over_1526_octets", }, - { .offset = 0x10, .name = "rx_pause", }, - { .offset = 0x11, .name = "rx_control", }, - { .offset = 0x12, .name = "rx_longs", }, - { .offset = 0x13, .name = "rx_classified_drops", }, - { .offset = 0x14, .name = "rx_red_prio_0", }, - { .offset = 0x15, .name = "rx_red_prio_1", }, - { .offset = 0x16, .name = "rx_red_prio_2", }, - { .offset = 0x17, .name = "rx_red_prio_3", }, - { .offset = 0x18, .name = "rx_red_prio_4", }, - { .offset = 0x19, .name = "rx_red_prio_5", }, - { .offset = 0x1A, .name = "rx_red_prio_6", }, - { .offset = 0x1B, .name = "rx_red_prio_7", }, - { .offset = 0x1C, .name = "rx_yellow_prio_0", }, - { .offset = 0x1D, .name = "rx_yellow_prio_1", }, - { .offset = 0x1E, .name = "rx_yellow_prio_2", }, - { .offset = 0x1F, .name = "rx_yellow_prio_3", }, - { .offset = 0x20, .name = "rx_yellow_prio_4", }, - { .offset = 0x21, .name = "rx_yellow_prio_5", }, - { .offset = 0x22, .name = "rx_yellow_prio_6", }, - { .offset = 0x23, .name = "rx_yellow_prio_7", }, - { .offset = 0x24, .name = "rx_green_prio_0", }, - { .offset = 0x25, .name = "rx_green_prio_1", }, - { .offset = 0x26, .name = "rx_green_prio_2", }, - { .offset = 0x27, .name = "rx_green_prio_3", }, - { .offset = 0x28, .name = "rx_green_prio_4", }, - { .offset = 0x29, .name = "rx_green_prio_5", }, - { .offset = 0x2A, .name = "rx_green_prio_6", }, - { .offset = 0x2B, .name = "rx_green_prio_7", }, - { .offset = 0x40, .name = "tx_octets", }, - { .offset = 0x41, .name = "tx_unicast", }, - { .offset = 0x42, .name = "tx_multicast", }, - { .offset = 0x43, .name = "tx_broadcast", }, - { .offset = 0x44, .name = "tx_collision", }, - { .offset = 0x45, .name = "tx_drops", }, - { .offset = 0x46, .name = "tx_pause", }, - { .offset = 0x47, .name = "tx_frames_below_65_octets", }, - { .offset = 0x48, .name = "tx_frames_65_to_127_octets", }, - { .offset = 0x49, .name = "tx_frames_128_255_octets", }, - { .offset = 0x4A, .name = "tx_frames_256_511_octets", }, - { .offset = 0x4B, .name = "tx_frames_512_1023_octets", }, - { .offset = 0x4C, .name = "tx_frames_1024_1526_octets", }, - { .offset = 0x4D, .name = "tx_frames_over_1526_octets", }, - { .offset = 0x4E, .name = "tx_yellow_prio_0", }, - { .offset = 0x4F, .name = "tx_yellow_prio_1", }, - { .offset = 0x50, .name = "tx_yellow_prio_2", }, - { .offset = 0x51, .name = "tx_yellow_prio_3", }, - { .offset = 0x52, .name = "tx_yellow_prio_4", }, - { .offset = 0x53, .name = "tx_yellow_prio_5", }, - { .offset = 0x54, .name = "tx_yellow_prio_6", }, - { .offset = 0x55, .name = "tx_yellow_prio_7", }, - { .offset = 0x56, .name = "tx_green_prio_0", }, - { .offset = 0x57, .name = "tx_green_prio_1", }, - { .offset = 0x58, .name = "tx_green_prio_2", }, - { .offset = 0x59, .name = "tx_green_prio_3", }, - { .offset = 0x5A, .name = "tx_green_prio_4", }, - { .offset = 0x5B, .name = "tx_green_prio_5", }, - { .offset = 0x5C, .name = "tx_green_prio_6", }, - { .offset = 0x5D, .name = "tx_green_prio_7", }, - { .offset = 0x5E, .name = "tx_aged", }, - { .offset = 0x80, .name = "drop_local", }, - { .offset = 0x81, .name = "drop_tail", }, - { .offset = 0x82, .name = "drop_yellow_prio_0", }, - { .offset = 0x83, .name = "drop_yellow_prio_1", }, - { .offset = 0x84, .name = "drop_yellow_prio_2", }, - { .offset = 0x85, .name = "drop_yellow_prio_3", }, - { .offset = 0x86, .name = "drop_yellow_prio_4", }, - { .offset = 0x87, .name = "drop_yellow_prio_5", }, - { .offset = 0x88, .name = "drop_yellow_prio_6", }, - { .offset = 0x89, .name = "drop_yellow_prio_7", }, - { .offset = 0x8A, .name = "drop_green_prio_0", }, - { .offset = 0x8B, .name = "drop_green_prio_1", }, - { .offset = 0x8C, .name = "drop_green_prio_2", }, - { .offset = 0x8D, .name = "drop_green_prio_3", }, - { .offset = 0x8E, .name = "drop_green_prio_4", }, - { .offset = 0x8F, .name = "drop_green_prio_5", }, - { .offset = 0x90, .name = "drop_green_prio_6", }, - { .offset = 0x91, .name = "drop_green_prio_7", }, - OCELOT_STAT_END +static const struct ocelot_stat_layout vsc9953_stats_layout[OCELOT_NUM_STATS] = { + [OCELOT_STAT_RX_OCTETS] = { + .name = "rx_octets", + .offset = 0x00, + }, + [OCELOT_STAT_RX_UNICAST] = { + .name = "rx_unicast", + .offset = 0x01, + }, + [OCELOT_STAT_RX_MULTICAST] = { + .name = "rx_multicast", + .offset = 0x02, + }, + [OCELOT_STAT_RX_BROADCAST] = { + .name = "rx_broadcast", + .offset = 0x03, + }, + [OCELOT_STAT_RX_SHORTS] = { + .name = "rx_shorts", + .offset = 0x04, + }, + [OCELOT_STAT_RX_FRAGMENTS] = { + .name = "rx_fragments", + .offset = 0x05, + }, + [OCELOT_STAT_RX_JABBERS] = { + .name = "rx_jabbers", + .offset = 0x06, + }, + [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { + .name = "rx_crc_align_errs", + .offset = 0x07, + }, + [OCELOT_STAT_RX_SYM_ERRS] = { + .name = "rx_sym_errs", + .offset = 0x08, + }, + [OCELOT_STAT_RX_64] = { + .name = "rx_frames_below_65_octets", + .offset = 0x09, + }, + [OCELOT_STAT_RX_65_127] = { + .name = "rx_frames_65_to_127_octets", + .offset = 0x0A, + }, + [OCELOT_STAT_RX_128_255] = { + .name = "rx_frames_128_to_255_octets", + .offset = 0x0B, + }, + [OCELOT_STAT_RX_256_511] = { + .name = "rx_frames_256_to_511_octets", + .offset = 0x0C, + }, + [OCELOT_STAT_RX_512_1023] = { + .name = "rx_frames_512_to_1023_octets", + .offset = 0x0D, + }, + [OCELOT_STAT_RX_1024_1526] = { + .name = "rx_frames_1024_to_1526_octets", + .offset = 0x0E, + }, + [OCELOT_STAT_RX_1527_MAX] = { + .name = "rx_frames_over_1526_octets", + .offset = 0x0F, + }, + [OCELOT_STAT_RX_PAUSE] = { + .name = "rx_pause", + .offset = 0x10, + }, + [OCELOT_STAT_RX_CONTROL] = { + .name = "rx_control", + .offset = 0x11, + }, + [OCELOT_STAT_RX_LONGS] = { + .name = "rx_longs", + .offset = 0x12, + }, + [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { + .name = "rx_classified_drops", + .offset = 0x13, + }, + [OCELOT_STAT_RX_RED_PRIO_0] = { + .name = "rx_red_prio_0", + .offset = 0x14, + }, + [OCELOT_STAT_RX_RED_PRIO_1] = { + .name = "rx_red_prio_1", + .offset = 0x15, + }, + [OCELOT_STAT_RX_RED_PRIO_2] = { + .name = "rx_red_prio_2", + .offset = 0x16, + }, + [OCELOT_STAT_RX_RED_PRIO_3] = { + .name = "rx_red_prio_3", + .offset = 0x17, + }, + [OCELOT_STAT_RX_RED_PRIO_4] = { + .name = "rx_red_prio_4", + .offset = 0x18, + }, + [OCELOT_STAT_RX_RED_PRIO_5] = { + .name = "rx_red_prio_5", + .offset = 0x19, + }, + [OCELOT_STAT_RX_RED_PRIO_6] = { + .name = "rx_red_prio_6", + .offset = 0x1A, + }, + [OCELOT_STAT_RX_RED_PRIO_7] = { + .name = "rx_red_prio_7", + .offset = 0x1B, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_0] = { + .name = "rx_yellow_prio_0", + .offset = 0x1C, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_1] = { + .name = "rx_yellow_prio_1", + .offset = 0x1D, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_2] = { + .name = "rx_yellow_prio_2", + .offset = 0x1E, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_3] = { + .name = "rx_yellow_prio_3", + .offset = 0x1F, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_4] = { + .name = "rx_yellow_prio_4", + .offset = 0x20, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_5] = { + .name = "rx_yellow_prio_5", + .offset = 0x21, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_6] = { + .name = "rx_yellow_prio_6", + .offset = 0x22, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_7] = { + .name = "rx_yellow_prio_7", + .offset = 0x23, + }, + [OCELOT_STAT_RX_GREEN_PRIO_0] = { + .name = "rx_green_prio_0", + .offset = 0x24, + }, + [OCELOT_STAT_RX_GREEN_PRIO_1] = { + .name = "rx_green_prio_1", + .offset = 0x25, + }, + [OCELOT_STAT_RX_GREEN_PRIO_2] = { + .name = "rx_green_prio_2", + .offset = 0x26, + }, + [OCELOT_STAT_RX_GREEN_PRIO_3] = { + .name = "rx_green_prio_3", + .offset = 0x27, + }, + [OCELOT_STAT_RX_GREEN_PRIO_4] = { + .name = "rx_green_prio_4", + .offset = 0x28, + }, + [OCELOT_STAT_RX_GREEN_PRIO_5] = { + .name = "rx_green_prio_5", + .offset = 0x29, + }, + [OCELOT_STAT_RX_GREEN_PRIO_6] = { + .name = "rx_green_prio_6", + .offset = 0x2A, + }, + [OCELOT_STAT_RX_GREEN_PRIO_7] = { + .name = "rx_green_prio_7", + .offset = 0x2B, + }, + [OCELOT_STAT_TX_OCTETS] = { + .name = "tx_octets", + .offset = 0x40, + }, + [OCELOT_STAT_TX_UNICAST] = { + .name = "tx_unicast", + .offset = 0x41, + }, + [OCELOT_STAT_TX_MULTICAST] = { + .name = "tx_multicast", + .offset = 0x42, + }, + [OCELOT_STAT_TX_BROADCAST] = { + .name = "tx_broadcast", + .offset = 0x43, + }, + [OCELOT_STAT_TX_COLLISION] = { + .name = "tx_collision", + .offset = 0x44, + }, + [OCELOT_STAT_TX_DROPS] = { + .name = "tx_drops", + .offset = 0x45, + }, + [OCELOT_STAT_TX_PAUSE] = { + .name = "tx_pause", + .offset = 0x46, + }, + [OCELOT_STAT_TX_64] = { + .name = "tx_frames_below_65_octets", + .offset = 0x47, + }, + [OCELOT_STAT_TX_65_127] = { + .name = "tx_frames_65_to_127_octets", + .offset = 0x48, + }, + [OCELOT_STAT_TX_128_255] = { + .name = "tx_frames_128_255_octets", + .offset = 0x49, + }, + [OCELOT_STAT_TX_256_511] = { + .name = "tx_frames_256_511_octets", + .offset = 0x4A, + }, + [OCELOT_STAT_TX_512_1023] = { + .name = "tx_frames_512_1023_octets", + .offset = 0x4B, + }, + [OCELOT_STAT_TX_1024_1526] = { + .name = "tx_frames_1024_1526_octets", + .offset = 0x4C, + }, + [OCELOT_STAT_TX_1527_MAX] = { + .name = "tx_frames_over_1526_octets", + .offset = 0x4D, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_0] = { + .name = "tx_yellow_prio_0", + .offset = 0x4E, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_1] = { + .name = "tx_yellow_prio_1", + .offset = 0x4F, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_2] = { + .name = "tx_yellow_prio_2", + .offset = 0x50, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_3] = { + .name = "tx_yellow_prio_3", + .offset = 0x51, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_4] = { + .name = "tx_yellow_prio_4", + .offset = 0x52, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_5] = { + .name = "tx_yellow_prio_5", + .offset = 0x53, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_6] = { + .name = "tx_yellow_prio_6", + .offset = 0x54, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_7] = { + .name = "tx_yellow_prio_7", + .offset = 0x55, + }, + [OCELOT_STAT_TX_GREEN_PRIO_0] = { + .name = "tx_green_prio_0", + .offset = 0x56, + }, + [OCELOT_STAT_TX_GREEN_PRIO_1] = { + .name = "tx_green_prio_1", + .offset = 0x57, + }, + [OCELOT_STAT_TX_GREEN_PRIO_2] = { + .name = "tx_green_prio_2", + .offset = 0x58, + }, + [OCELOT_STAT_TX_GREEN_PRIO_3] = { + .name = "tx_green_prio_3", + .offset = 0x59, + }, + [OCELOT_STAT_TX_GREEN_PRIO_4] = { + .name = "tx_green_prio_4", + .offset = 0x5A, + }, + [OCELOT_STAT_TX_GREEN_PRIO_5] = { + .name = "tx_green_prio_5", + .offset = 0x5B, + }, + [OCELOT_STAT_TX_GREEN_PRIO_6] = { + .name = "tx_green_prio_6", + .offset = 0x5C, + }, + [OCELOT_STAT_TX_GREEN_PRIO_7] = { + .name = "tx_green_prio_7", + .offset = 0x5D, + }, + [OCELOT_STAT_TX_AGED] = { + .name = "tx_aged", + .offset = 0x5E, + }, + [OCELOT_STAT_DROP_LOCAL] = { + .name = "drop_local", + .offset = 0x80, + }, + [OCELOT_STAT_DROP_TAIL] = { + .name = "drop_tail", + .offset = 0x81, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { + .name = "drop_yellow_prio_0", + .offset = 0x82, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { + .name = "drop_yellow_prio_1", + .offset = 0x83, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { + .name = "drop_yellow_prio_2", + .offset = 0x84, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { + .name = "drop_yellow_prio_3", + .offset = 0x85, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { + .name = "drop_yellow_prio_4", + .offset = 0x86, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { + .name = "drop_yellow_prio_5", + .offset = 0x87, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { + .name = "drop_yellow_prio_6", + .offset = 0x88, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { + .name = "drop_yellow_prio_7", + .offset = 0x89, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_0] = { + .name = "drop_green_prio_0", + .offset = 0x8A, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_1] = { + .name = "drop_green_prio_1", + .offset = 0x8B, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_2] = { + .name = "drop_green_prio_2", + .offset = 0x8C, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_3] = { + .name = "drop_green_prio_3", + .offset = 0x8D, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_4] = { + .name = "drop_green_prio_4", + .offset = 0x8E, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_5] = { + .name = "drop_green_prio_5", + .offset = 0x8F, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_6] = { + .name = "drop_green_prio_6", + .offset = 0x90, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_7] = { + .name = "drop_green_prio_7", + .offset = 0x91, + }, }; static const struct vcap_field vsc9953_vcap_es0_keys[] = { diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c67f162f8ab5..68991b021c56 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1860,16 +1860,20 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) if (sset != ETH_SS_STATS) return; - for (i = 0; i < ocelot->num_stats; i++) + for (i = 0; i < OCELOT_NUM_STATS; i++) { + if (ocelot->stats_layout[i].name[0] == '\0') + continue; + memcpy(data + i * ETH_GSTRING_LEN, ocelot->stats_layout[i].name, ETH_GSTRING_LEN); + } } EXPORT_SYMBOL(ocelot_get_strings); /* Caller must hold &ocelot->stats_lock */ static int ocelot_port_update_stats(struct ocelot *ocelot, int port) { - unsigned int idx = port * ocelot->num_stats; + unsigned int idx = port * OCELOT_NUM_STATS; struct ocelot_stats_region *region; int err, j; @@ -1930,9 +1934,15 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) /* check and update now */ err = ocelot_port_update_stats(ocelot, port); - /* Copy all counters */ - for (i = 0; i < ocelot->num_stats; i++) - *data++ = ocelot->stats[port * ocelot->num_stats + i]; + /* Copy all supported counters */ + for (i = 0; i < OCELOT_NUM_STATS; i++) { + int index = port * OCELOT_NUM_STATS + i; + + if (ocelot->stats_layout[i].name[0] == '\0') + continue; + + *data++ = ocelot->stats[index]; + } spin_unlock(&ocelot->stats_lock); @@ -1943,10 +1953,16 @@ EXPORT_SYMBOL(ocelot_get_ethtool_stats); int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) { + int i, num_stats = 0; + if (sset != ETH_SS_STATS) return -EOPNOTSUPP; - return ocelot->num_stats; + for (i = 0; i < OCELOT_NUM_STATS; i++) + if (ocelot->stats_layout[i].name[0] != '\0') + num_stats++; + + return num_stats; } EXPORT_SYMBOL(ocelot_get_sset_count); @@ -1958,7 +1974,10 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) INIT_LIST_HEAD(&ocelot->stats_regions); - for (i = 0; i < ocelot->num_stats; i++) { + for (i = 0; i < OCELOT_NUM_STATS; i++) { + if (ocelot->stats_layout[i].name[0] == '\0') + continue; + if (region && ocelot->stats_layout[i].offset == last + 1) { region->count++; } else { @@ -3340,7 +3359,6 @@ static void ocelot_detect_features(struct ocelot *ocelot) int ocelot_init(struct ocelot *ocelot) { - const struct ocelot_stat_layout *stat; char queue_name[32]; int i, ret; u32 port; @@ -3353,12 +3371,8 @@ int ocelot_init(struct ocelot *ocelot) } } - ocelot->num_stats = 0; - for_each_stat(ocelot, stat) - ocelot->num_stats++; - ocelot->stats = devm_kcalloc(ocelot->dev, - ocelot->num_phys_ports * ocelot->num_stats, + ocelot->num_phys_ports * OCELOT_NUM_STATS, sizeof(u64), GFP_KERNEL); if (!ocelot->stats) return -ENOMEM; diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 961f803aca19..9ff910560043 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -96,101 +96,379 @@ static const struct reg_field ocelot_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 12, 4), }; -static const struct ocelot_stat_layout ocelot_stats_layout[] = { - { .name = "rx_octets", .offset = 0x00, }, - { .name = "rx_unicast", .offset = 0x01, }, - { .name = "rx_multicast", .offset = 0x02, }, - { .name = "rx_broadcast", .offset = 0x03, }, - { .name = "rx_shorts", .offset = 0x04, }, - { .name = "rx_fragments", .offset = 0x05, }, - { .name = "rx_jabbers", .offset = 0x06, }, - { .name = "rx_crc_align_errs", .offset = 0x07, }, - { .name = "rx_sym_errs", .offset = 0x08, }, - { .name = "rx_frames_below_65_octets", .offset = 0x09, }, - { .name = "rx_frames_65_to_127_octets", .offset = 0x0A, }, - { .name = "rx_frames_128_to_255_octets", .offset = 0x0B, }, - { .name = "rx_frames_256_to_511_octets", .offset = 0x0C, }, - { .name = "rx_frames_512_to_1023_octets", .offset = 0x0D, }, - { .name = "rx_frames_1024_to_1526_octets", .offset = 0x0E, }, - { .name = "rx_frames_over_1526_octets", .offset = 0x0F, }, - { .name = "rx_pause", .offset = 0x10, }, - { .name = "rx_control", .offset = 0x11, }, - { .name = "rx_longs", .offset = 0x12, }, - { .name = "rx_classified_drops", .offset = 0x13, }, - { .name = "rx_red_prio_0", .offset = 0x14, }, - { .name = "rx_red_prio_1", .offset = 0x15, }, - { .name = "rx_red_prio_2", .offset = 0x16, }, - { .name = "rx_red_prio_3", .offset = 0x17, }, - { .name = "rx_red_prio_4", .offset = 0x18, }, - { .name = "rx_red_prio_5", .offset = 0x19, }, - { .name = "rx_red_prio_6", .offset = 0x1A, }, - { .name = "rx_red_prio_7", .offset = 0x1B, }, - { .name = "rx_yellow_prio_0", .offset = 0x1C, }, - { .name = "rx_yellow_prio_1", .offset = 0x1D, }, - { .name = "rx_yellow_prio_2", .offset = 0x1E, }, - { .name = "rx_yellow_prio_3", .offset = 0x1F, }, - { .name = "rx_yellow_prio_4", .offset = 0x20, }, - { .name = "rx_yellow_prio_5", .offset = 0x21, }, - { .name = "rx_yellow_prio_6", .offset = 0x22, }, - { .name = "rx_yellow_prio_7", .offset = 0x23, }, - { .name = "rx_green_prio_0", .offset = 0x24, }, - { .name = "rx_green_prio_1", .offset = 0x25, }, - { .name = "rx_green_prio_2", .offset = 0x26, }, - { .name = "rx_green_prio_3", .offset = 0x27, }, - { .name = "rx_green_prio_4", .offset = 0x28, }, - { .name = "rx_green_prio_5", .offset = 0x29, }, - { .name = "rx_green_prio_6", .offset = 0x2A, }, - { .name = "rx_green_prio_7", .offset = 0x2B, }, - { .name = "tx_octets", .offset = 0x40, }, - { .name = "tx_unicast", .offset = 0x41, }, - { .name = "tx_multicast", .offset = 0x42, }, - { .name = "tx_broadcast", .offset = 0x43, }, - { .name = "tx_collision", .offset = 0x44, }, - { .name = "tx_drops", .offset = 0x45, }, - { .name = "tx_pause", .offset = 0x46, }, - { .name = "tx_frames_below_65_octets", .offset = 0x47, }, - { .name = "tx_frames_65_to_127_octets", .offset = 0x48, }, - { .name = "tx_frames_128_255_octets", .offset = 0x49, }, - { .name = "tx_frames_256_511_octets", .offset = 0x4A, }, - { .name = "tx_frames_512_1023_octets", .offset = 0x4B, }, - { .name = "tx_frames_1024_1526_octets", .offset = 0x4C, }, - { .name = "tx_frames_over_1526_octets", .offset = 0x4D, }, - { .name = "tx_yellow_prio_0", .offset = 0x4E, }, - { .name = "tx_yellow_prio_1", .offset = 0x4F, }, - { .name = "tx_yellow_prio_2", .offset = 0x50, }, - { .name = "tx_yellow_prio_3", .offset = 0x51, }, - { .name = "tx_yellow_prio_4", .offset = 0x52, }, - { .name = "tx_yellow_prio_5", .offset = 0x53, }, - { .name = "tx_yellow_prio_6", .offset = 0x54, }, - { .name = "tx_yellow_prio_7", .offset = 0x55, }, - { .name = "tx_green_prio_0", .offset = 0x56, }, - { .name = "tx_green_prio_1", .offset = 0x57, }, - { .name = "tx_green_prio_2", .offset = 0x58, }, - { .name = "tx_green_prio_3", .offset = 0x59, }, - { .name = "tx_green_prio_4", .offset = 0x5A, }, - { .name = "tx_green_prio_5", .offset = 0x5B, }, - { .name = "tx_green_prio_6", .offset = 0x5C, }, - { .name = "tx_green_prio_7", .offset = 0x5D, }, - { .name = "tx_aged", .offset = 0x5E, }, - { .name = "drop_local", .offset = 0x80, }, - { .name = "drop_tail", .offset = 0x81, }, - { .name = "drop_yellow_prio_0", .offset = 0x82, }, - { .name = "drop_yellow_prio_1", .offset = 0x83, }, - { .name = "drop_yellow_prio_2", .offset = 0x84, }, - { .name = "drop_yellow_prio_3", .offset = 0x85, }, - { .name = "drop_yellow_prio_4", .offset = 0x86, }, - { .name = "drop_yellow_prio_5", .offset = 0x87, }, - { .name = "drop_yellow_prio_6", .offset = 0x88, }, - { .name = "drop_yellow_prio_7", .offset = 0x89, }, - { .name = "drop_green_prio_0", .offset = 0x8A, }, - { .name = "drop_green_prio_1", .offset = 0x8B, }, - { .name = "drop_green_prio_2", .offset = 0x8C, }, - { .name = "drop_green_prio_3", .offset = 0x8D, }, - { .name = "drop_green_prio_4", .offset = 0x8E, }, - { .name = "drop_green_prio_5", .offset = 0x8F, }, - { .name = "drop_green_prio_6", .offset = 0x90, }, - { .name = "drop_green_prio_7", .offset = 0x91, }, - OCELOT_STAT_END +static const struct ocelot_stat_layout ocelot_stats_layout[OCELOT_NUM_STATS] = { + [OCELOT_STAT_RX_OCTETS] = { + .name = "rx_octets", + .offset = 0x00, + }, + [OCELOT_STAT_RX_UNICAST] = { + .name = "rx_unicast", + .offset = 0x01, + }, + [OCELOT_STAT_RX_MULTICAST] = { + .name = "rx_multicast", + .offset = 0x02, + }, + [OCELOT_STAT_RX_BROADCAST] = { + .name = "rx_broadcast", + .offset = 0x03, + }, + [OCELOT_STAT_RX_SHORTS] = { + .name = "rx_shorts", + .offset = 0x04, + }, + [OCELOT_STAT_RX_FRAGMENTS] = { + .name = "rx_fragments", + .offset = 0x05, + }, + [OCELOT_STAT_RX_JABBERS] = { + .name = "rx_jabbers", + .offset = 0x06, + }, + [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { + .name = "rx_crc_align_errs", + .offset = 0x07, + }, + [OCELOT_STAT_RX_SYM_ERRS] = { + .name = "rx_sym_errs", + .offset = 0x08, + }, + [OCELOT_STAT_RX_64] = { + .name = "rx_frames_below_65_octets", + .offset = 0x09, + }, + [OCELOT_STAT_RX_65_127] = { + .name = "rx_frames_65_to_127_octets", + .offset = 0x0A, + }, + [OCELOT_STAT_RX_128_255] = { + .name = "rx_frames_128_to_255_octets", + .offset = 0x0B, + }, + [OCELOT_STAT_RX_256_511] = { + .name = "rx_frames_256_to_511_octets", + .offset = 0x0C, + }, + [OCELOT_STAT_RX_512_1023] = { + .name = "rx_frames_512_to_1023_octets", + .offset = 0x0D, + }, + [OCELOT_STAT_RX_1024_1526] = { + .name = "rx_frames_1024_to_1526_octets", + .offset = 0x0E, + }, + [OCELOT_STAT_RX_1527_MAX] = { + .name = "rx_frames_over_1526_octets", + .offset = 0x0F, + }, + [OCELOT_STAT_RX_PAUSE] = { + .name = "rx_pause", + .offset = 0x10, + }, + [OCELOT_STAT_RX_CONTROL] = { + .name = "rx_control", + .offset = 0x11, + }, + [OCELOT_STAT_RX_LONGS] = { + .name = "rx_longs", + .offset = 0x12, + }, + [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { + .name = "rx_classified_drops", + .offset = 0x13, + }, + [OCELOT_STAT_RX_RED_PRIO_0] = { + .name = "rx_red_prio_0", + .offset = 0x14, + }, + [OCELOT_STAT_RX_RED_PRIO_1] = { + .name = "rx_red_prio_1", + .offset = 0x15, + }, + [OCELOT_STAT_RX_RED_PRIO_2] = { + .name = "rx_red_prio_2", + .offset = 0x16, + }, + [OCELOT_STAT_RX_RED_PRIO_3] = { + .name = "rx_red_prio_3", + .offset = 0x17, + }, + [OCELOT_STAT_RX_RED_PRIO_4] = { + .name = "rx_red_prio_4", + .offset = 0x18, + }, + [OCELOT_STAT_RX_RED_PRIO_5] = { + .name = "rx_red_prio_5", + .offset = 0x19, + }, + [OCELOT_STAT_RX_RED_PRIO_6] = { + .name = "rx_red_prio_6", + .offset = 0x1A, + }, + [OCELOT_STAT_RX_RED_PRIO_7] = { + .name = "rx_red_prio_7", + .offset = 0x1B, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_0] = { + .name = "rx_yellow_prio_0", + .offset = 0x1C, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_1] = { + .name = "rx_yellow_prio_1", + .offset = 0x1D, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_2] = { + .name = "rx_yellow_prio_2", + .offset = 0x1E, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_3] = { + .name = "rx_yellow_prio_3", + .offset = 0x1F, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_4] = { + .name = "rx_yellow_prio_4", + .offset = 0x20, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_5] = { + .name = "rx_yellow_prio_5", + .offset = 0x21, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_6] = { + .name = "rx_yellow_prio_6", + .offset = 0x22, + }, + [OCELOT_STAT_RX_YELLOW_PRIO_7] = { + .name = "rx_yellow_prio_7", + .offset = 0x23, + }, + [OCELOT_STAT_RX_GREEN_PRIO_0] = { + .name = "rx_green_prio_0", + .offset = 0x24, + }, + [OCELOT_STAT_RX_GREEN_PRIO_1] = { + .name = "rx_green_prio_1", + .offset = 0x25, + }, + [OCELOT_STAT_RX_GREEN_PRIO_2] = { + .name = "rx_green_prio_2", + .offset = 0x26, + }, + [OCELOT_STAT_RX_GREEN_PRIO_3] = { + .name = "rx_green_prio_3", + .offset = 0x27, + }, + [OCELOT_STAT_RX_GREEN_PRIO_4] = { + .name = "rx_green_prio_4", + .offset = 0x28, + }, + [OCELOT_STAT_RX_GREEN_PRIO_5] = { + .name = "rx_green_prio_5", + .offset = 0x29, + }, + [OCELOT_STAT_RX_GREEN_PRIO_6] = { + .name = "rx_green_prio_6", + .offset = 0x2A, + }, + [OCELOT_STAT_RX_GREEN_PRIO_7] = { + .name = "rx_green_prio_7", + .offset = 0x2B, + }, + [OCELOT_STAT_TX_OCTETS] = { + .name = "tx_octets", + .offset = 0x40, + }, + [OCELOT_STAT_TX_UNICAST] = { + .name = "tx_unicast", + .offset = 0x41, + }, + [OCELOT_STAT_TX_MULTICAST] = { + .name = "tx_multicast", + .offset = 0x42, + }, + [OCELOT_STAT_TX_BROADCAST] = { + .name = "tx_broadcast", + .offset = 0x43, + }, + [OCELOT_STAT_TX_COLLISION] = { + .name = "tx_collision", + .offset = 0x44, + }, + [OCELOT_STAT_TX_DROPS] = { + .name = "tx_drops", + .offset = 0x45, + }, + [OCELOT_STAT_TX_PAUSE] = { + .name = "tx_pause", + .offset = 0x46, + }, + [OCELOT_STAT_TX_64] = { + .name = "tx_frames_below_65_octets", + .offset = 0x47, + }, + [OCELOT_STAT_TX_65_127] = { + .name = "tx_frames_65_to_127_octets", + .offset = 0x48, + }, + [OCELOT_STAT_TX_128_255] = { + .name = "tx_frames_128_255_octets", + .offset = 0x49, + }, + [OCELOT_STAT_TX_256_511] = { + .name = "tx_frames_256_511_octets", + .offset = 0x4A, + }, + [OCELOT_STAT_TX_512_1023] = { + .name = "tx_frames_512_1023_octets", + .offset = 0x4B, + }, + [OCELOT_STAT_TX_1024_1526] = { + .name = "tx_frames_1024_1526_octets", + .offset = 0x4C, + }, + [OCELOT_STAT_TX_1527_MAX] = { + .name = "tx_frames_over_1526_octets", + .offset = 0x4D, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_0] = { + .name = "tx_yellow_prio_0", + .offset = 0x4E, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_1] = { + .name = "tx_yellow_prio_1", + .offset = 0x4F, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_2] = { + .name = "tx_yellow_prio_2", + .offset = 0x50, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_3] = { + .name = "tx_yellow_prio_3", + .offset = 0x51, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_4] = { + .name = "tx_yellow_prio_4", + .offset = 0x52, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_5] = { + .name = "tx_yellow_prio_5", + .offset = 0x53, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_6] = { + .name = "tx_yellow_prio_6", + .offset = 0x54, + }, + [OCELOT_STAT_TX_YELLOW_PRIO_7] = { + .name = "tx_yellow_prio_7", + .offset = 0x55, + }, + [OCELOT_STAT_TX_GREEN_PRIO_0] = { + .name = "tx_green_prio_0", + .offset = 0x56, + }, + [OCELOT_STAT_TX_GREEN_PRIO_1] = { + .name = "tx_green_prio_1", + .offset = 0x57, + }, + [OCELOT_STAT_TX_GREEN_PRIO_2] = { + .name = "tx_green_prio_2", + .offset = 0x58, + }, + [OCELOT_STAT_TX_GREEN_PRIO_3] = { + .name = "tx_green_prio_3", + .offset = 0x59, + }, + [OCELOT_STAT_TX_GREEN_PRIO_4] = { + .name = "tx_green_prio_4", + .offset = 0x5A, + }, + [OCELOT_STAT_TX_GREEN_PRIO_5] = { + .name = "tx_green_prio_5", + .offset = 0x5B, + }, + [OCELOT_STAT_TX_GREEN_PRIO_6] = { + .name = "tx_green_prio_6", + .offset = 0x5C, + }, + [OCELOT_STAT_TX_GREEN_PRIO_7] = { + .name = "tx_green_prio_7", + .offset = 0x5D, + }, + [OCELOT_STAT_TX_AGED] = { + .name = "tx_aged", + .offset = 0x5E, + }, + [OCELOT_STAT_DROP_LOCAL] = { + .name = "drop_local", + .offset = 0x80, + }, + [OCELOT_STAT_DROP_TAIL] = { + .name = "drop_tail", + .offset = 0x81, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { + .name = "drop_yellow_prio_0", + .offset = 0x82, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { + .name = "drop_yellow_prio_1", + .offset = 0x83, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { + .name = "drop_yellow_prio_2", + .offset = 0x84, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { + .name = "drop_yellow_prio_3", + .offset = 0x85, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { + .name = "drop_yellow_prio_4", + .offset = 0x86, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { + .name = "drop_yellow_prio_5", + .offset = 0x87, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { + .name = "drop_yellow_prio_6", + .offset = 0x88, + }, + [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { + .name = "drop_yellow_prio_7", + .offset = 0x89, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_0] = { + .name = "drop_green_prio_0", + .offset = 0x8A, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_1] = { + .name = "drop_green_prio_1", + .offset = 0x8B, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_2] = { + .name = "drop_green_prio_2", + .offset = 0x8C, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_3] = { + .name = "drop_green_prio_3", + .offset = 0x8D, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_4] = { + .name = "drop_green_prio_4", + .offset = 0x8E, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_5] = { + .name = "drop_green_prio_5", + .offset = 0x8F, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_6] = { + .name = "drop_green_prio_6", + .offset = 0x90, + }, + [OCELOT_STAT_DROP_GREEN_PRIO_7] = { + .name = "drop_green_prio_7", + .offset = 0x91, + }, }; static void ocelot_pll5_init(struct ocelot *ocelot) diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 72b9474391da..2428bc64cb1d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -105,11 +105,6 @@ #define REG_RESERVED_ADDR 0xffffffff #define REG_RESERVED(reg) REG(reg, REG_RESERVED_ADDR) -#define for_each_stat(ocelot, stat) \ - for ((stat) = (ocelot)->stats_layout; \ - ((stat)->name[0] != '\0'); \ - (stat)++) - enum ocelot_target { ANA = 1, QS, @@ -540,13 +535,108 @@ enum ocelot_ptp_pins { TOD_ACC_PIN }; +enum ocelot_stat { + OCELOT_STAT_RX_OCTETS, + OCELOT_STAT_RX_UNICAST, + OCELOT_STAT_RX_MULTICAST, + OCELOT_STAT_RX_BROADCAST, + OCELOT_STAT_RX_SHORTS, + OCELOT_STAT_RX_FRAGMENTS, + OCELOT_STAT_RX_JABBERS, + OCELOT_STAT_RX_CRC_ALIGN_ERRS, + OCELOT_STAT_RX_SYM_ERRS, + OCELOT_STAT_RX_64, + OCELOT_STAT_RX_65_127, + OCELOT_STAT_RX_128_255, + OCELOT_STAT_RX_256_511, + OCELOT_STAT_RX_512_1023, + OCELOT_STAT_RX_1024_1526, + OCELOT_STAT_RX_1527_MAX, + OCELOT_STAT_RX_PAUSE, + OCELOT_STAT_RX_CONTROL, + OCELOT_STAT_RX_LONGS, + OCELOT_STAT_RX_CLASSIFIED_DROPS, + OCELOT_STAT_RX_RED_PRIO_0, + OCELOT_STAT_RX_RED_PRIO_1, + OCELOT_STAT_RX_RED_PRIO_2, + OCELOT_STAT_RX_RED_PRIO_3, + OCELOT_STAT_RX_RED_PRIO_4, + OCELOT_STAT_RX_RED_PRIO_5, + OCELOT_STAT_RX_RED_PRIO_6, + OCELOT_STAT_RX_RED_PRIO_7, + OCELOT_STAT_RX_YELLOW_PRIO_0, + OCELOT_STAT_RX_YELLOW_PRIO_1, + OCELOT_STAT_RX_YELLOW_PRIO_2, + OCELOT_STAT_RX_YELLOW_PRIO_3, + OCELOT_STAT_RX_YELLOW_PRIO_4, + OCELOT_STAT_RX_YELLOW_PRIO_5, + OCELOT_STAT_RX_YELLOW_PRIO_6, + OCELOT_STAT_RX_YELLOW_PRIO_7, + OCELOT_STAT_RX_GREEN_PRIO_0, + OCELOT_STAT_RX_GREEN_PRIO_1, + OCELOT_STAT_RX_GREEN_PRIO_2, + OCELOT_STAT_RX_GREEN_PRIO_3, + OCELOT_STAT_RX_GREEN_PRIO_4, + OCELOT_STAT_RX_GREEN_PRIO_5, + OCELOT_STAT_RX_GREEN_PRIO_6, + OCELOT_STAT_RX_GREEN_PRIO_7, + OCELOT_STAT_TX_OCTETS, + OCELOT_STAT_TX_UNICAST, + OCELOT_STAT_TX_MULTICAST, + OCELOT_STAT_TX_BROADCAST, + OCELOT_STAT_TX_COLLISION, + OCELOT_STAT_TX_DROPS, + OCELOT_STAT_TX_PAUSE, + OCELOT_STAT_TX_64, + OCELOT_STAT_TX_65_127, + OCELOT_STAT_TX_128_255, + OCELOT_STAT_TX_256_511, + OCELOT_STAT_TX_512_1023, + OCELOT_STAT_TX_1024_1526, + OCELOT_STAT_TX_1527_MAX, + OCELOT_STAT_TX_YELLOW_PRIO_0, + OCELOT_STAT_TX_YELLOW_PRIO_1, + OCELOT_STAT_TX_YELLOW_PRIO_2, + OCELOT_STAT_TX_YELLOW_PRIO_3, + OCELOT_STAT_TX_YELLOW_PRIO_4, + OCELOT_STAT_TX_YELLOW_PRIO_5, + OCELOT_STAT_TX_YELLOW_PRIO_6, + OCELOT_STAT_TX_YELLOW_PRIO_7, + OCELOT_STAT_TX_GREEN_PRIO_0, + OCELOT_STAT_TX_GREEN_PRIO_1, + OCELOT_STAT_TX_GREEN_PRIO_2, + OCELOT_STAT_TX_GREEN_PRIO_3, + OCELOT_STAT_TX_GREEN_PRIO_4, + OCELOT_STAT_TX_GREEN_PRIO_5, + OCELOT_STAT_TX_GREEN_PRIO_6, + OCELOT_STAT_TX_GREEN_PRIO_7, + OCELOT_STAT_TX_AGED, + OCELOT_STAT_DROP_LOCAL, + OCELOT_STAT_DROP_TAIL, + OCELOT_STAT_DROP_YELLOW_PRIO_0, + OCELOT_STAT_DROP_YELLOW_PRIO_1, + OCELOT_STAT_DROP_YELLOW_PRIO_2, + OCELOT_STAT_DROP_YELLOW_PRIO_3, + OCELOT_STAT_DROP_YELLOW_PRIO_4, + OCELOT_STAT_DROP_YELLOW_PRIO_5, + OCELOT_STAT_DROP_YELLOW_PRIO_6, + OCELOT_STAT_DROP_YELLOW_PRIO_7, + OCELOT_STAT_DROP_GREEN_PRIO_0, + OCELOT_STAT_DROP_GREEN_PRIO_1, + OCELOT_STAT_DROP_GREEN_PRIO_2, + OCELOT_STAT_DROP_GREEN_PRIO_3, + OCELOT_STAT_DROP_GREEN_PRIO_4, + OCELOT_STAT_DROP_GREEN_PRIO_5, + OCELOT_STAT_DROP_GREEN_PRIO_6, + OCELOT_STAT_DROP_GREEN_PRIO_7, + OCELOT_NUM_STATS, +}; + struct ocelot_stat_layout { u32 offset; char name[ETH_GSTRING_LEN]; }; -#define OCELOT_STAT_END { .name = "" } - struct ocelot_stats_region { struct list_head node; u32 offset; @@ -709,7 +799,6 @@ struct ocelot { const u32 *const *map; const struct ocelot_stat_layout *stats_layout; struct list_head stats_regions; - unsigned int num_stats; u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; -- cgit v1.2.3 From d4c367650704de091d4c1f6bb379c0a5c389c73a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 16 Aug 2022 16:53:51 +0300 Subject: net: mscc: ocelot: keep ocelot_stat_layout by reg address, not offset With so many counter addresses recently discovered as being wrong, it is desirable to at least have a central database of information, rather than two: one through the SYS_COUNT_* registers (used for ndo_get_stats64), and the other through the offset field of struct ocelot_stat_layout elements (used for ethtool -S). The strategy will be to keep the SYS_COUNT_* definitions as the single source of truth, but for that we need to expand our current definitions to cover all registers. Then we need to convert the ocelot region creation logic, and stats worker, to the read semantics imposed by going through SYS_COUNT_* absolute register addresses, rather than offsets of 32-bit words relative to SYS_COUNT_RX_OCTETS (which should have been SYS_CNT, by the way). Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 253 +++++++++++++++++----------- drivers/net/dsa/ocelot/seville_vsc9953.c | 255 ++++++++++++++++++----------- drivers/net/ethernet/mscc/ocelot.c | 11 +- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 186 ++++++++++----------- drivers/net/ethernet/mscc/vsc7514_regs.c | 58 +++++++ include/soc/mscc/ocelot.h | 66 +++++++- 6 files changed, 540 insertions(+), 289 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index c9f270f24b1c..1cdce8a98d1d 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -274,10 +274,14 @@ static const u32 vsc9959_rew_regmap[] = { static const u32 vsc9959_sys_regmap[] = { REG(SYS_COUNT_RX_OCTETS, 0x000000), + REG(SYS_COUNT_RX_UNICAST, 0x000004), REG(SYS_COUNT_RX_MULTICAST, 0x000008), + REG(SYS_COUNT_RX_BROADCAST, 0x00000c), REG(SYS_COUNT_RX_SHORTS, 0x000010), REG(SYS_COUNT_RX_FRAGMENTS, 0x000014), REG(SYS_COUNT_RX_JABBERS, 0x000018), + REG(SYS_COUNT_RX_CRC_ALIGN_ERRS, 0x00001c), + REG(SYS_COUNT_RX_SYM_ERRS, 0x000020), REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), @@ -288,9 +292,38 @@ static const u32 vsc9959_sys_regmap[] = { REG(SYS_COUNT_RX_PAUSE, 0x000040), REG(SYS_COUNT_RX_CONTROL, 0x000044), REG(SYS_COUNT_RX_LONGS, 0x000048), + REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), + REG(SYS_COUNT_RX_RED_PRIO_0, 0x000050), + REG(SYS_COUNT_RX_RED_PRIO_1, 0x000054), + REG(SYS_COUNT_RX_RED_PRIO_2, 0x000058), + REG(SYS_COUNT_RX_RED_PRIO_3, 0x00005c), + REG(SYS_COUNT_RX_RED_PRIO_4, 0x000060), + REG(SYS_COUNT_RX_RED_PRIO_5, 0x000064), + REG(SYS_COUNT_RX_RED_PRIO_6, 0x000068), + REG(SYS_COUNT_RX_RED_PRIO_7, 0x00006c), + REG(SYS_COUNT_RX_YELLOW_PRIO_0, 0x000070), + REG(SYS_COUNT_RX_YELLOW_PRIO_1, 0x000074), + REG(SYS_COUNT_RX_YELLOW_PRIO_2, 0x000078), + REG(SYS_COUNT_RX_YELLOW_PRIO_3, 0x00007c), + REG(SYS_COUNT_RX_YELLOW_PRIO_4, 0x000080), + REG(SYS_COUNT_RX_YELLOW_PRIO_5, 0x000084), + REG(SYS_COUNT_RX_YELLOW_PRIO_6, 0x000088), + REG(SYS_COUNT_RX_YELLOW_PRIO_7, 0x00008c), + REG(SYS_COUNT_RX_GREEN_PRIO_0, 0x000090), + REG(SYS_COUNT_RX_GREEN_PRIO_1, 0x000094), + REG(SYS_COUNT_RX_GREEN_PRIO_2, 0x000098), + REG(SYS_COUNT_RX_GREEN_PRIO_3, 0x00009c), + REG(SYS_COUNT_RX_GREEN_PRIO_4, 0x0000a0), + REG(SYS_COUNT_RX_GREEN_PRIO_5, 0x0000a4), + REG(SYS_COUNT_RX_GREEN_PRIO_6, 0x0000a8), + REG(SYS_COUNT_RX_GREEN_PRIO_7, 0x0000ac), REG(SYS_COUNT_TX_OCTETS, 0x000200), + REG(SYS_COUNT_TX_UNICAST, 0x000204), + REG(SYS_COUNT_TX_MULTICAST, 0x000208), + REG(SYS_COUNT_TX_BROADCAST, 0x00020c), REG(SYS_COUNT_TX_COLLISION, 0x000210), REG(SYS_COUNT_TX_DROPS, 0x000214), + REG(SYS_COUNT_TX_PAUSE, 0x000218), REG(SYS_COUNT_TX_64, 0x00021c), REG(SYS_COUNT_TX_65_127, 0x000220), REG(SYS_COUNT_TX_128_255, 0x000224), @@ -298,7 +331,41 @@ static const u32 vsc9959_sys_regmap[] = { REG(SYS_COUNT_TX_512_1023, 0x00022c), REG(SYS_COUNT_TX_1024_1526, 0x000230), REG(SYS_COUNT_TX_1527_MAX, 0x000234), + REG(SYS_COUNT_TX_YELLOW_PRIO_0, 0x000238), + REG(SYS_COUNT_TX_YELLOW_PRIO_1, 0x00023c), + REG(SYS_COUNT_TX_YELLOW_PRIO_2, 0x000240), + REG(SYS_COUNT_TX_YELLOW_PRIO_3, 0x000244), + REG(SYS_COUNT_TX_YELLOW_PRIO_4, 0x000248), + REG(SYS_COUNT_TX_YELLOW_PRIO_5, 0x00024c), + REG(SYS_COUNT_TX_YELLOW_PRIO_6, 0x000250), + REG(SYS_COUNT_TX_YELLOW_PRIO_7, 0x000254), + REG(SYS_COUNT_TX_GREEN_PRIO_0, 0x000258), + REG(SYS_COUNT_TX_GREEN_PRIO_1, 0x00025c), + REG(SYS_COUNT_TX_GREEN_PRIO_2, 0x000260), + REG(SYS_COUNT_TX_GREEN_PRIO_3, 0x000264), + REG(SYS_COUNT_TX_GREEN_PRIO_4, 0x000268), + REG(SYS_COUNT_TX_GREEN_PRIO_5, 0x00026c), + REG(SYS_COUNT_TX_GREEN_PRIO_6, 0x000270), + REG(SYS_COUNT_TX_GREEN_PRIO_7, 0x000274), REG(SYS_COUNT_TX_AGING, 0x000278), + REG(SYS_COUNT_DROP_LOCAL, 0x000400), + REG(SYS_COUNT_DROP_TAIL, 0x000404), + REG(SYS_COUNT_DROP_YELLOW_PRIO_0, 0x000408), + REG(SYS_COUNT_DROP_YELLOW_PRIO_1, 0x00040c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_2, 0x000410), + REG(SYS_COUNT_DROP_YELLOW_PRIO_3, 0x000414), + REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000418), + REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00041c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000420), + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000424), + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000428), + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00042c), + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000430), + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000434), + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000438), + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00043c), + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000440), + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000444), REG(SYS_RESET_CFG, 0x000e00), REG(SYS_SR_ETYPE_CFG, 0x000e04), REG(SYS_VLAN_ETYPE_CFG, 0x000e08), @@ -554,375 +621,375 @@ static const struct reg_field vsc9959_regfields[REGFIELD_MAX] = { static const struct ocelot_stat_layout vsc9959_stats_layout[OCELOT_NUM_STATS] = { [OCELOT_STAT_RX_OCTETS] = { .name = "rx_octets", - .offset = 0x00, + .reg = SYS_COUNT_RX_OCTETS, }, [OCELOT_STAT_RX_UNICAST] = { .name = "rx_unicast", - .offset = 0x01, + .reg = SYS_COUNT_RX_UNICAST, }, [OCELOT_STAT_RX_MULTICAST] = { .name = "rx_multicast", - .offset = 0x02, + .reg = SYS_COUNT_RX_MULTICAST, }, [OCELOT_STAT_RX_BROADCAST] = { .name = "rx_broadcast", - .offset = 0x03, + .reg = SYS_COUNT_RX_BROADCAST, }, [OCELOT_STAT_RX_SHORTS] = { .name = "rx_shorts", - .offset = 0x04, + .reg = SYS_COUNT_RX_SHORTS, }, [OCELOT_STAT_RX_FRAGMENTS] = { .name = "rx_fragments", - .offset = 0x05, + .reg = SYS_COUNT_RX_FRAGMENTS, }, [OCELOT_STAT_RX_JABBERS] = { .name = "rx_jabbers", - .offset = 0x06, + .reg = SYS_COUNT_RX_JABBERS, }, [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { .name = "rx_crc_align_errs", - .offset = 0x07, + .reg = SYS_COUNT_RX_CRC_ALIGN_ERRS, }, [OCELOT_STAT_RX_SYM_ERRS] = { .name = "rx_sym_errs", - .offset = 0x08, + .reg = SYS_COUNT_RX_SYM_ERRS, }, [OCELOT_STAT_RX_64] = { .name = "rx_frames_below_65_octets", - .offset = 0x09, + .reg = SYS_COUNT_RX_64, }, [OCELOT_STAT_RX_65_127] = { .name = "rx_frames_65_to_127_octets", - .offset = 0x0A, + .reg = SYS_COUNT_RX_65_127, }, [OCELOT_STAT_RX_128_255] = { .name = "rx_frames_128_to_255_octets", - .offset = 0x0B, + .reg = SYS_COUNT_RX_128_255, }, [OCELOT_STAT_RX_256_511] = { .name = "rx_frames_256_to_511_octets", - .offset = 0x0C, + .reg = SYS_COUNT_RX_256_511, }, [OCELOT_STAT_RX_512_1023] = { .name = "rx_frames_512_to_1023_octets", - .offset = 0x0D, + .reg = SYS_COUNT_RX_512_1023, }, [OCELOT_STAT_RX_1024_1526] = { .name = "rx_frames_1024_to_1526_octets", - .offset = 0x0E, + .reg = SYS_COUNT_RX_1024_1526, }, [OCELOT_STAT_RX_1527_MAX] = { .name = "rx_frames_over_1526_octets", - .offset = 0x0F, + .reg = SYS_COUNT_RX_1527_MAX, }, [OCELOT_STAT_RX_PAUSE] = { .name = "rx_pause", - .offset = 0x10, + .reg = SYS_COUNT_RX_PAUSE, }, [OCELOT_STAT_RX_CONTROL] = { .name = "rx_control", - .offset = 0x11, + .reg = SYS_COUNT_RX_CONTROL, }, [OCELOT_STAT_RX_LONGS] = { .name = "rx_longs", - .offset = 0x12, + .reg = SYS_COUNT_RX_LONGS, }, [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { .name = "rx_classified_drops", - .offset = 0x13, + .reg = SYS_COUNT_RX_CLASSIFIED_DROPS, }, [OCELOT_STAT_RX_RED_PRIO_0] = { .name = "rx_red_prio_0", - .offset = 0x14, + .reg = SYS_COUNT_RX_RED_PRIO_0, }, [OCELOT_STAT_RX_RED_PRIO_1] = { .name = "rx_red_prio_1", - .offset = 0x15, + .reg = SYS_COUNT_RX_RED_PRIO_1, }, [OCELOT_STAT_RX_RED_PRIO_2] = { .name = "rx_red_prio_2", - .offset = 0x16, + .reg = SYS_COUNT_RX_RED_PRIO_2, }, [OCELOT_STAT_RX_RED_PRIO_3] = { .name = "rx_red_prio_3", - .offset = 0x17, + .reg = SYS_COUNT_RX_RED_PRIO_3, }, [OCELOT_STAT_RX_RED_PRIO_4] = { .name = "rx_red_prio_4", - .offset = 0x18, + .reg = SYS_COUNT_RX_RED_PRIO_4, }, [OCELOT_STAT_RX_RED_PRIO_5] = { .name = "rx_red_prio_5", - .offset = 0x19, + .reg = SYS_COUNT_RX_RED_PRIO_5, }, [OCELOT_STAT_RX_RED_PRIO_6] = { .name = "rx_red_prio_6", - .offset = 0x1A, + .reg = SYS_COUNT_RX_RED_PRIO_6, }, [OCELOT_STAT_RX_RED_PRIO_7] = { .name = "rx_red_prio_7", - .offset = 0x1B, + .reg = SYS_COUNT_RX_RED_PRIO_7, }, [OCELOT_STAT_RX_YELLOW_PRIO_0] = { .name = "rx_yellow_prio_0", - .offset = 0x1C, + .reg = SYS_COUNT_RX_YELLOW_PRIO_0, }, [OCELOT_STAT_RX_YELLOW_PRIO_1] = { .name = "rx_yellow_prio_1", - .offset = 0x1D, + .reg = SYS_COUNT_RX_YELLOW_PRIO_1, }, [OCELOT_STAT_RX_YELLOW_PRIO_2] = { .name = "rx_yellow_prio_2", - .offset = 0x1E, + .reg = SYS_COUNT_RX_YELLOW_PRIO_2, }, [OCELOT_STAT_RX_YELLOW_PRIO_3] = { .name = "rx_yellow_prio_3", - .offset = 0x1F, + .reg = SYS_COUNT_RX_YELLOW_PRIO_3, }, [OCELOT_STAT_RX_YELLOW_PRIO_4] = { .name = "rx_yellow_prio_4", - .offset = 0x20, + .reg = SYS_COUNT_RX_YELLOW_PRIO_4, }, [OCELOT_STAT_RX_YELLOW_PRIO_5] = { .name = "rx_yellow_prio_5", - .offset = 0x21, + .reg = SYS_COUNT_RX_YELLOW_PRIO_5, }, [OCELOT_STAT_RX_YELLOW_PRIO_6] = { .name = "rx_yellow_prio_6", - .offset = 0x22, + .reg = SYS_COUNT_RX_YELLOW_PRIO_6, }, [OCELOT_STAT_RX_YELLOW_PRIO_7] = { .name = "rx_yellow_prio_7", - .offset = 0x23, + .reg = SYS_COUNT_RX_YELLOW_PRIO_7, }, [OCELOT_STAT_RX_GREEN_PRIO_0] = { .name = "rx_green_prio_0", - .offset = 0x24, + .reg = SYS_COUNT_RX_GREEN_PRIO_0, }, [OCELOT_STAT_RX_GREEN_PRIO_1] = { .name = "rx_green_prio_1", - .offset = 0x25, + .reg = SYS_COUNT_RX_GREEN_PRIO_1, }, [OCELOT_STAT_RX_GREEN_PRIO_2] = { .name = "rx_green_prio_2", - .offset = 0x26, + .reg = SYS_COUNT_RX_GREEN_PRIO_2, }, [OCELOT_STAT_RX_GREEN_PRIO_3] = { .name = "rx_green_prio_3", - .offset = 0x27, + .reg = SYS_COUNT_RX_GREEN_PRIO_3, }, [OCELOT_STAT_RX_GREEN_PRIO_4] = { .name = "rx_green_prio_4", - .offset = 0x28, + .reg = SYS_COUNT_RX_GREEN_PRIO_4, }, [OCELOT_STAT_RX_GREEN_PRIO_5] = { .name = "rx_green_prio_5", - .offset = 0x29, + .reg = SYS_COUNT_RX_GREEN_PRIO_5, }, [OCELOT_STAT_RX_GREEN_PRIO_6] = { .name = "rx_green_prio_6", - .offset = 0x2A, + .reg = SYS_COUNT_RX_GREEN_PRIO_6, }, [OCELOT_STAT_RX_GREEN_PRIO_7] = { .name = "rx_green_prio_7", - .offset = 0x2B, + .reg = SYS_COUNT_RX_GREEN_PRIO_7, }, [OCELOT_STAT_TX_OCTETS] = { .name = "tx_octets", - .offset = 0x80, + .reg = SYS_COUNT_TX_OCTETS, }, [OCELOT_STAT_TX_UNICAST] = { .name = "tx_unicast", - .offset = 0x81, + .reg = SYS_COUNT_TX_UNICAST, }, [OCELOT_STAT_TX_MULTICAST] = { .name = "tx_multicast", - .offset = 0x82, + .reg = SYS_COUNT_TX_MULTICAST, }, [OCELOT_STAT_TX_BROADCAST] = { .name = "tx_broadcast", - .offset = 0x83, + .reg = SYS_COUNT_TX_BROADCAST, }, [OCELOT_STAT_TX_COLLISION] = { .name = "tx_collision", - .offset = 0x84, + .reg = SYS_COUNT_TX_COLLISION, }, [OCELOT_STAT_TX_DROPS] = { .name = "tx_drops", - .offset = 0x85, + .reg = SYS_COUNT_TX_DROPS, }, [OCELOT_STAT_TX_PAUSE] = { .name = "tx_pause", - .offset = 0x86, + .reg = SYS_COUNT_TX_PAUSE, }, [OCELOT_STAT_TX_64] = { .name = "tx_frames_below_65_octets", - .offset = 0x87, + .reg = SYS_COUNT_TX_64, }, [OCELOT_STAT_TX_65_127] = { .name = "tx_frames_65_to_127_octets", - .offset = 0x88, + .reg = SYS_COUNT_TX_65_127, }, [OCELOT_STAT_TX_128_255] = { .name = "tx_frames_128_255_octets", - .offset = 0x89, + .reg = SYS_COUNT_TX_128_255, }, [OCELOT_STAT_TX_256_511] = { .name = "tx_frames_256_511_octets", - .offset = 0x8A, + .reg = SYS_COUNT_TX_256_511, }, [OCELOT_STAT_TX_512_1023] = { .name = "tx_frames_512_1023_octets", - .offset = 0x8B, + .reg = SYS_COUNT_TX_512_1023, }, [OCELOT_STAT_TX_1024_1526] = { .name = "tx_frames_1024_1526_octets", - .offset = 0x8C, + .reg = SYS_COUNT_TX_1024_1526, }, [OCELOT_STAT_TX_1527_MAX] = { .name = "tx_frames_over_1526_octets", - .offset = 0x8D, + .reg = SYS_COUNT_TX_1527_MAX, }, [OCELOT_STAT_TX_YELLOW_PRIO_0] = { .name = "tx_yellow_prio_0", - .offset = 0x8E, + .reg = SYS_COUNT_TX_YELLOW_PRIO_0, }, [OCELOT_STAT_TX_YELLOW_PRIO_1] = { .name = "tx_yellow_prio_1", - .offset = 0x8F, + .reg = SYS_COUNT_TX_YELLOW_PRIO_1, }, [OCELOT_STAT_TX_YELLOW_PRIO_2] = { .name = "tx_yellow_prio_2", - .offset = 0x90, + .reg = SYS_COUNT_TX_YELLOW_PRIO_2, }, [OCELOT_STAT_TX_YELLOW_PRIO_3] = { .name = "tx_yellow_prio_3", - .offset = 0x91, + .reg = SYS_COUNT_TX_YELLOW_PRIO_3, }, [OCELOT_STAT_TX_YELLOW_PRIO_4] = { .name = "tx_yellow_prio_4", - .offset = 0x92, + .reg = SYS_COUNT_TX_YELLOW_PRIO_4, }, [OCELOT_STAT_TX_YELLOW_PRIO_5] = { .name = "tx_yellow_prio_5", - .offset = 0x93, + .reg = SYS_COUNT_TX_YELLOW_PRIO_5, }, [OCELOT_STAT_TX_YELLOW_PRIO_6] = { .name = "tx_yellow_prio_6", - .offset = 0x94, + .reg = SYS_COUNT_TX_YELLOW_PRIO_6, }, [OCELOT_STAT_TX_YELLOW_PRIO_7] = { .name = "tx_yellow_prio_7", - .offset = 0x95, + .reg = SYS_COUNT_TX_YELLOW_PRIO_7, }, [OCELOT_STAT_TX_GREEN_PRIO_0] = { .name = "tx_green_prio_0", - .offset = 0x96, + .reg = SYS_COUNT_TX_GREEN_PRIO_0, }, [OCELOT_STAT_TX_GREEN_PRIO_1] = { .name = "tx_green_prio_1", - .offset = 0x97, + .reg = SYS_COUNT_TX_GREEN_PRIO_1, }, [OCELOT_STAT_TX_GREEN_PRIO_2] = { .name = "tx_green_prio_2", - .offset = 0x98, + .reg = SYS_COUNT_TX_GREEN_PRIO_2, }, [OCELOT_STAT_TX_GREEN_PRIO_3] = { .name = "tx_green_prio_3", - .offset = 0x99, + .reg = SYS_COUNT_TX_GREEN_PRIO_3, }, [OCELOT_STAT_TX_GREEN_PRIO_4] = { .name = "tx_green_prio_4", - .offset = 0x9A, + .reg = SYS_COUNT_TX_GREEN_PRIO_4, }, [OCELOT_STAT_TX_GREEN_PRIO_5] = { .name = "tx_green_prio_5", - .offset = 0x9B, + .reg = SYS_COUNT_TX_GREEN_PRIO_5, }, [OCELOT_STAT_TX_GREEN_PRIO_6] = { .name = "tx_green_prio_6", - .offset = 0x9C, + .reg = SYS_COUNT_TX_GREEN_PRIO_6, }, [OCELOT_STAT_TX_GREEN_PRIO_7] = { .name = "tx_green_prio_7", - .offset = 0x9D, + .reg = SYS_COUNT_TX_GREEN_PRIO_7, }, [OCELOT_STAT_TX_AGED] = { .name = "tx_aged", - .offset = 0x9E, + .reg = SYS_COUNT_TX_AGING, }, [OCELOT_STAT_DROP_LOCAL] = { .name = "drop_local", - .offset = 0x100, + .reg = SYS_COUNT_DROP_LOCAL, }, [OCELOT_STAT_DROP_TAIL] = { .name = "drop_tail", - .offset = 0x101, + .reg = SYS_COUNT_DROP_TAIL, }, [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { .name = "drop_yellow_prio_0", - .offset = 0x102, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_0, }, [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { .name = "drop_yellow_prio_1", - .offset = 0x103, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_1, }, [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { .name = "drop_yellow_prio_2", - .offset = 0x104, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_2, }, [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { .name = "drop_yellow_prio_3", - .offset = 0x105, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_3, }, [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { .name = "drop_yellow_prio_4", - .offset = 0x106, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_4, }, [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { .name = "drop_yellow_prio_5", - .offset = 0x107, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_5, }, [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { .name = "drop_yellow_prio_6", - .offset = 0x108, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_6, }, [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { .name = "drop_yellow_prio_7", - .offset = 0x109, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_7, }, [OCELOT_STAT_DROP_GREEN_PRIO_0] = { .name = "drop_green_prio_0", - .offset = 0x10A, + .reg = SYS_COUNT_DROP_GREEN_PRIO_0, }, [OCELOT_STAT_DROP_GREEN_PRIO_1] = { .name = "drop_green_prio_1", - .offset = 0x10B, + .reg = SYS_COUNT_DROP_GREEN_PRIO_1, }, [OCELOT_STAT_DROP_GREEN_PRIO_2] = { .name = "drop_green_prio_2", - .offset = 0x10C, + .reg = SYS_COUNT_DROP_GREEN_PRIO_2, }, [OCELOT_STAT_DROP_GREEN_PRIO_3] = { .name = "drop_green_prio_3", - .offset = 0x10D, + .reg = SYS_COUNT_DROP_GREEN_PRIO_3, }, [OCELOT_STAT_DROP_GREEN_PRIO_4] = { .name = "drop_green_prio_4", - .offset = 0x10E, + .reg = SYS_COUNT_DROP_GREEN_PRIO_4, }, [OCELOT_STAT_DROP_GREEN_PRIO_5] = { .name = "drop_green_prio_5", - .offset = 0x10F, + .reg = SYS_COUNT_DROP_GREEN_PRIO_5, }, [OCELOT_STAT_DROP_GREEN_PRIO_6] = { .name = "drop_green_prio_6", - .offset = 0x110, + .reg = SYS_COUNT_DROP_GREEN_PRIO_6, }, [OCELOT_STAT_DROP_GREEN_PRIO_7] = { .name = "drop_green_prio_7", - .offset = 0x111, + .reg = SYS_COUNT_DROP_GREEN_PRIO_7, }, }; diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index fe5d4642d0bc..b34f4cdfe814 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -270,10 +270,14 @@ static const u32 vsc9953_rew_regmap[] = { static const u32 vsc9953_sys_regmap[] = { REG(SYS_COUNT_RX_OCTETS, 0x000000), + REG(SYS_COUNT_RX_UNICAST, 0x000004), REG(SYS_COUNT_RX_MULTICAST, 0x000008), + REG(SYS_COUNT_RX_BROADCAST, 0x00000c), REG(SYS_COUNT_RX_SHORTS, 0x000010), REG(SYS_COUNT_RX_FRAGMENTS, 0x000014), REG(SYS_COUNT_RX_JABBERS, 0x000018), + REG(SYS_COUNT_RX_CRC_ALIGN_ERRS, 0x00001c), + REG(SYS_COUNT_RX_SYM_ERRS, 0x000020), REG(SYS_COUNT_RX_64, 0x000024), REG(SYS_COUNT_RX_65_127, 0x000028), REG(SYS_COUNT_RX_128_255, 0x00002c), @@ -281,10 +285,41 @@ static const u32 vsc9953_sys_regmap[] = { REG(SYS_COUNT_RX_512_1023, 0x000034), REG(SYS_COUNT_RX_1024_1526, 0x000038), REG(SYS_COUNT_RX_1527_MAX, 0x00003c), + REG(SYS_COUNT_RX_PAUSE, 0x000040), + REG(SYS_COUNT_RX_CONTROL, 0x000044), REG(SYS_COUNT_RX_LONGS, 0x000048), + REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), + REG(SYS_COUNT_RX_RED_PRIO_0, 0x000050), + REG(SYS_COUNT_RX_RED_PRIO_1, 0x000054), + REG(SYS_COUNT_RX_RED_PRIO_2, 0x000058), + REG(SYS_COUNT_RX_RED_PRIO_3, 0x00005c), + REG(SYS_COUNT_RX_RED_PRIO_4, 0x000060), + REG(SYS_COUNT_RX_RED_PRIO_5, 0x000064), + REG(SYS_COUNT_RX_RED_PRIO_6, 0x000068), + REG(SYS_COUNT_RX_RED_PRIO_7, 0x00006c), + REG(SYS_COUNT_RX_YELLOW_PRIO_0, 0x000070), + REG(SYS_COUNT_RX_YELLOW_PRIO_1, 0x000074), + REG(SYS_COUNT_RX_YELLOW_PRIO_2, 0x000078), + REG(SYS_COUNT_RX_YELLOW_PRIO_3, 0x00007c), + REG(SYS_COUNT_RX_YELLOW_PRIO_4, 0x000080), + REG(SYS_COUNT_RX_YELLOW_PRIO_5, 0x000084), + REG(SYS_COUNT_RX_YELLOW_PRIO_6, 0x000088), + REG(SYS_COUNT_RX_YELLOW_PRIO_7, 0x00008c), + REG(SYS_COUNT_RX_GREEN_PRIO_0, 0x000090), + REG(SYS_COUNT_RX_GREEN_PRIO_1, 0x000094), + REG(SYS_COUNT_RX_GREEN_PRIO_2, 0x000098), + REG(SYS_COUNT_RX_GREEN_PRIO_3, 0x00009c), + REG(SYS_COUNT_RX_GREEN_PRIO_4, 0x0000a0), + REG(SYS_COUNT_RX_GREEN_PRIO_5, 0x0000a4), + REG(SYS_COUNT_RX_GREEN_PRIO_6, 0x0000a8), + REG(SYS_COUNT_RX_GREEN_PRIO_7, 0x0000ac), REG(SYS_COUNT_TX_OCTETS, 0x000100), + REG(SYS_COUNT_TX_UNICAST, 0x000104), + REG(SYS_COUNT_TX_MULTICAST, 0x000108), + REG(SYS_COUNT_TX_BROADCAST, 0x00010c), REG(SYS_COUNT_TX_COLLISION, 0x000110), REG(SYS_COUNT_TX_DROPS, 0x000114), + REG(SYS_COUNT_TX_PAUSE, 0x000118), REG(SYS_COUNT_TX_64, 0x00011c), REG(SYS_COUNT_TX_65_127, 0x000120), REG(SYS_COUNT_TX_128_255, 0x000124), @@ -292,7 +327,41 @@ static const u32 vsc9953_sys_regmap[] = { REG(SYS_COUNT_TX_512_1023, 0x00012c), REG(SYS_COUNT_TX_1024_1526, 0x000130), REG(SYS_COUNT_TX_1527_MAX, 0x000134), + REG(SYS_COUNT_TX_YELLOW_PRIO_0, 0x000138), + REG(SYS_COUNT_TX_YELLOW_PRIO_1, 0x00013c), + REG(SYS_COUNT_TX_YELLOW_PRIO_2, 0x000140), + REG(SYS_COUNT_TX_YELLOW_PRIO_3, 0x000144), + REG(SYS_COUNT_TX_YELLOW_PRIO_4, 0x000148), + REG(SYS_COUNT_TX_YELLOW_PRIO_5, 0x00014c), + REG(SYS_COUNT_TX_YELLOW_PRIO_6, 0x000150), + REG(SYS_COUNT_TX_YELLOW_PRIO_7, 0x000154), + REG(SYS_COUNT_TX_GREEN_PRIO_0, 0x000158), + REG(SYS_COUNT_TX_GREEN_PRIO_1, 0x00015c), + REG(SYS_COUNT_TX_GREEN_PRIO_2, 0x000160), + REG(SYS_COUNT_TX_GREEN_PRIO_3, 0x000164), + REG(SYS_COUNT_TX_GREEN_PRIO_4, 0x000168), + REG(SYS_COUNT_TX_GREEN_PRIO_5, 0x00016c), + REG(SYS_COUNT_TX_GREEN_PRIO_6, 0x000170), + REG(SYS_COUNT_TX_GREEN_PRIO_7, 0x000174), REG(SYS_COUNT_TX_AGING, 0x000178), + REG(SYS_COUNT_DROP_LOCAL, 0x000200), + REG(SYS_COUNT_DROP_TAIL, 0x000204), + REG(SYS_COUNT_DROP_YELLOW_PRIO_0, 0x000208), + REG(SYS_COUNT_DROP_YELLOW_PRIO_1, 0x00020c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_2, 0x000210), + REG(SYS_COUNT_DROP_YELLOW_PRIO_3, 0x000214), + REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000218), + REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00021c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000220), + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000224), + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000228), + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00022c), + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000230), + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000234), + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000238), + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00023c), + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000240), + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000244), REG(SYS_RESET_CFG, 0x000318), REG_RESERVED(SYS_SR_ETYPE_CFG), REG(SYS_VLAN_ETYPE_CFG, 0x000320), @@ -548,375 +617,375 @@ static const struct reg_field vsc9953_regfields[REGFIELD_MAX] = { static const struct ocelot_stat_layout vsc9953_stats_layout[OCELOT_NUM_STATS] = { [OCELOT_STAT_RX_OCTETS] = { .name = "rx_octets", - .offset = 0x00, + .reg = SYS_COUNT_RX_OCTETS, }, [OCELOT_STAT_RX_UNICAST] = { .name = "rx_unicast", - .offset = 0x01, + .reg = SYS_COUNT_RX_UNICAST, }, [OCELOT_STAT_RX_MULTICAST] = { .name = "rx_multicast", - .offset = 0x02, + .reg = SYS_COUNT_RX_MULTICAST, }, [OCELOT_STAT_RX_BROADCAST] = { .name = "rx_broadcast", - .offset = 0x03, + .reg = SYS_COUNT_RX_BROADCAST, }, [OCELOT_STAT_RX_SHORTS] = { .name = "rx_shorts", - .offset = 0x04, + .reg = SYS_COUNT_RX_SHORTS, }, [OCELOT_STAT_RX_FRAGMENTS] = { .name = "rx_fragments", - .offset = 0x05, + .reg = SYS_COUNT_RX_FRAGMENTS, }, [OCELOT_STAT_RX_JABBERS] = { .name = "rx_jabbers", - .offset = 0x06, + .reg = SYS_COUNT_RX_JABBERS, }, [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { .name = "rx_crc_align_errs", - .offset = 0x07, + .reg = SYS_COUNT_RX_CRC_ALIGN_ERRS, }, [OCELOT_STAT_RX_SYM_ERRS] = { .name = "rx_sym_errs", - .offset = 0x08, + .reg = SYS_COUNT_RX_SYM_ERRS, }, [OCELOT_STAT_RX_64] = { .name = "rx_frames_below_65_octets", - .offset = 0x09, + .reg = SYS_COUNT_RX_64, }, [OCELOT_STAT_RX_65_127] = { .name = "rx_frames_65_to_127_octets", - .offset = 0x0A, + .reg = SYS_COUNT_RX_65_127, }, [OCELOT_STAT_RX_128_255] = { .name = "rx_frames_128_to_255_octets", - .offset = 0x0B, + .reg = SYS_COUNT_RX_128_255, }, [OCELOT_STAT_RX_256_511] = { .name = "rx_frames_256_to_511_octets", - .offset = 0x0C, + .reg = SYS_COUNT_RX_256_511, }, [OCELOT_STAT_RX_512_1023] = { .name = "rx_frames_512_to_1023_octets", - .offset = 0x0D, + .reg = SYS_COUNT_RX_512_1023, }, [OCELOT_STAT_RX_1024_1526] = { .name = "rx_frames_1024_to_1526_octets", - .offset = 0x0E, + .reg = SYS_COUNT_RX_1024_1526, }, [OCELOT_STAT_RX_1527_MAX] = { .name = "rx_frames_over_1526_octets", - .offset = 0x0F, + .reg = SYS_COUNT_RX_1527_MAX, }, [OCELOT_STAT_RX_PAUSE] = { .name = "rx_pause", - .offset = 0x10, + .reg = SYS_COUNT_RX_PAUSE, }, [OCELOT_STAT_RX_CONTROL] = { .name = "rx_control", - .offset = 0x11, + .reg = SYS_COUNT_RX_CONTROL, }, [OCELOT_STAT_RX_LONGS] = { .name = "rx_longs", - .offset = 0x12, + .reg = SYS_COUNT_RX_LONGS, }, [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { .name = "rx_classified_drops", - .offset = 0x13, + .reg = SYS_COUNT_RX_CLASSIFIED_DROPS, }, [OCELOT_STAT_RX_RED_PRIO_0] = { .name = "rx_red_prio_0", - .offset = 0x14, + .reg = SYS_COUNT_RX_RED_PRIO_0, }, [OCELOT_STAT_RX_RED_PRIO_1] = { .name = "rx_red_prio_1", - .offset = 0x15, + .reg = SYS_COUNT_RX_RED_PRIO_1, }, [OCELOT_STAT_RX_RED_PRIO_2] = { .name = "rx_red_prio_2", - .offset = 0x16, + .reg = SYS_COUNT_RX_RED_PRIO_2, }, [OCELOT_STAT_RX_RED_PRIO_3] = { .name = "rx_red_prio_3", - .offset = 0x17, + .reg = SYS_COUNT_RX_RED_PRIO_3, }, [OCELOT_STAT_RX_RED_PRIO_4] = { .name = "rx_red_prio_4", - .offset = 0x18, + .reg = SYS_COUNT_RX_RED_PRIO_4, }, [OCELOT_STAT_RX_RED_PRIO_5] = { .name = "rx_red_prio_5", - .offset = 0x19, + .reg = SYS_COUNT_RX_RED_PRIO_5, }, [OCELOT_STAT_RX_RED_PRIO_6] = { .name = "rx_red_prio_6", - .offset = 0x1A, + .reg = SYS_COUNT_RX_RED_PRIO_6, }, [OCELOT_STAT_RX_RED_PRIO_7] = { .name = "rx_red_prio_7", - .offset = 0x1B, + .reg = SYS_COUNT_RX_RED_PRIO_7, }, [OCELOT_STAT_RX_YELLOW_PRIO_0] = { .name = "rx_yellow_prio_0", - .offset = 0x1C, + .reg = SYS_COUNT_RX_YELLOW_PRIO_0, }, [OCELOT_STAT_RX_YELLOW_PRIO_1] = { .name = "rx_yellow_prio_1", - .offset = 0x1D, + .reg = SYS_COUNT_RX_YELLOW_PRIO_1, }, [OCELOT_STAT_RX_YELLOW_PRIO_2] = { .name = "rx_yellow_prio_2", - .offset = 0x1E, + .reg = SYS_COUNT_RX_YELLOW_PRIO_2, }, [OCELOT_STAT_RX_YELLOW_PRIO_3] = { .name = "rx_yellow_prio_3", - .offset = 0x1F, + .reg = SYS_COUNT_RX_YELLOW_PRIO_3, }, [OCELOT_STAT_RX_YELLOW_PRIO_4] = { .name = "rx_yellow_prio_4", - .offset = 0x20, + .reg = SYS_COUNT_RX_YELLOW_PRIO_4, }, [OCELOT_STAT_RX_YELLOW_PRIO_5] = { .name = "rx_yellow_prio_5", - .offset = 0x21, + .reg = SYS_COUNT_RX_YELLOW_PRIO_5, }, [OCELOT_STAT_RX_YELLOW_PRIO_6] = { .name = "rx_yellow_prio_6", - .offset = 0x22, + .reg = SYS_COUNT_RX_YELLOW_PRIO_6, }, [OCELOT_STAT_RX_YELLOW_PRIO_7] = { .name = "rx_yellow_prio_7", - .offset = 0x23, + .reg = SYS_COUNT_RX_YELLOW_PRIO_7, }, [OCELOT_STAT_RX_GREEN_PRIO_0] = { .name = "rx_green_prio_0", - .offset = 0x24, + .reg = SYS_COUNT_RX_GREEN_PRIO_0, }, [OCELOT_STAT_RX_GREEN_PRIO_1] = { .name = "rx_green_prio_1", - .offset = 0x25, + .reg = SYS_COUNT_RX_GREEN_PRIO_1, }, [OCELOT_STAT_RX_GREEN_PRIO_2] = { .name = "rx_green_prio_2", - .offset = 0x26, + .reg = SYS_COUNT_RX_GREEN_PRIO_2, }, [OCELOT_STAT_RX_GREEN_PRIO_3] = { .name = "rx_green_prio_3", - .offset = 0x27, + .reg = SYS_COUNT_RX_GREEN_PRIO_3, }, [OCELOT_STAT_RX_GREEN_PRIO_4] = { .name = "rx_green_prio_4", - .offset = 0x28, + .reg = SYS_COUNT_RX_GREEN_PRIO_4, }, [OCELOT_STAT_RX_GREEN_PRIO_5] = { .name = "rx_green_prio_5", - .offset = 0x29, + .reg = SYS_COUNT_RX_GREEN_PRIO_5, }, [OCELOT_STAT_RX_GREEN_PRIO_6] = { .name = "rx_green_prio_6", - .offset = 0x2A, + .reg = SYS_COUNT_RX_GREEN_PRIO_6, }, [OCELOT_STAT_RX_GREEN_PRIO_7] = { .name = "rx_green_prio_7", - .offset = 0x2B, + .reg = SYS_COUNT_RX_GREEN_PRIO_7, }, [OCELOT_STAT_TX_OCTETS] = { .name = "tx_octets", - .offset = 0x40, + .reg = SYS_COUNT_TX_OCTETS, }, [OCELOT_STAT_TX_UNICAST] = { .name = "tx_unicast", - .offset = 0x41, + .reg = SYS_COUNT_TX_UNICAST, }, [OCELOT_STAT_TX_MULTICAST] = { .name = "tx_multicast", - .offset = 0x42, + .reg = SYS_COUNT_TX_MULTICAST, }, [OCELOT_STAT_TX_BROADCAST] = { .name = "tx_broadcast", - .offset = 0x43, + .reg = SYS_COUNT_TX_BROADCAST, }, [OCELOT_STAT_TX_COLLISION] = { .name = "tx_collision", - .offset = 0x44, + .reg = SYS_COUNT_TX_COLLISION, }, [OCELOT_STAT_TX_DROPS] = { .name = "tx_drops", - .offset = 0x45, + .reg = SYS_COUNT_TX_DROPS, }, [OCELOT_STAT_TX_PAUSE] = { .name = "tx_pause", - .offset = 0x46, + .reg = SYS_COUNT_TX_PAUSE, }, [OCELOT_STAT_TX_64] = { .name = "tx_frames_below_65_octets", - .offset = 0x47, + .reg = SYS_COUNT_TX_64, }, [OCELOT_STAT_TX_65_127] = { .name = "tx_frames_65_to_127_octets", - .offset = 0x48, + .reg = SYS_COUNT_TX_65_127, }, [OCELOT_STAT_TX_128_255] = { .name = "tx_frames_128_255_octets", - .offset = 0x49, + .reg = SYS_COUNT_TX_128_255, }, [OCELOT_STAT_TX_256_511] = { .name = "tx_frames_256_511_octets", - .offset = 0x4A, + .reg = SYS_COUNT_TX_256_511, }, [OCELOT_STAT_TX_512_1023] = { .name = "tx_frames_512_1023_octets", - .offset = 0x4B, + .reg = SYS_COUNT_TX_512_1023, }, [OCELOT_STAT_TX_1024_1526] = { .name = "tx_frames_1024_1526_octets", - .offset = 0x4C, + .reg = SYS_COUNT_TX_1024_1526, }, [OCELOT_STAT_TX_1527_MAX] = { .name = "tx_frames_over_1526_octets", - .offset = 0x4D, + .reg = SYS_COUNT_TX_1527_MAX, }, [OCELOT_STAT_TX_YELLOW_PRIO_0] = { .name = "tx_yellow_prio_0", - .offset = 0x4E, + .reg = SYS_COUNT_TX_YELLOW_PRIO_0, }, [OCELOT_STAT_TX_YELLOW_PRIO_1] = { .name = "tx_yellow_prio_1", - .offset = 0x4F, + .reg = SYS_COUNT_TX_YELLOW_PRIO_1, }, [OCELOT_STAT_TX_YELLOW_PRIO_2] = { .name = "tx_yellow_prio_2", - .offset = 0x50, + .reg = SYS_COUNT_TX_YELLOW_PRIO_2, }, [OCELOT_STAT_TX_YELLOW_PRIO_3] = { .name = "tx_yellow_prio_3", - .offset = 0x51, + .reg = SYS_COUNT_TX_YELLOW_PRIO_3, }, [OCELOT_STAT_TX_YELLOW_PRIO_4] = { .name = "tx_yellow_prio_4", - .offset = 0x52, + .reg = SYS_COUNT_TX_YELLOW_PRIO_4, }, [OCELOT_STAT_TX_YELLOW_PRIO_5] = { .name = "tx_yellow_prio_5", - .offset = 0x53, + .reg = SYS_COUNT_TX_YELLOW_PRIO_5, }, [OCELOT_STAT_TX_YELLOW_PRIO_6] = { .name = "tx_yellow_prio_6", - .offset = 0x54, + .reg = SYS_COUNT_TX_YELLOW_PRIO_6, }, [OCELOT_STAT_TX_YELLOW_PRIO_7] = { .name = "tx_yellow_prio_7", - .offset = 0x55, + .reg = SYS_COUNT_TX_YELLOW_PRIO_7, }, [OCELOT_STAT_TX_GREEN_PRIO_0] = { .name = "tx_green_prio_0", - .offset = 0x56, + .reg = SYS_COUNT_TX_GREEN_PRIO_0, }, [OCELOT_STAT_TX_GREEN_PRIO_1] = { .name = "tx_green_prio_1", - .offset = 0x57, + .reg = SYS_COUNT_TX_GREEN_PRIO_1, }, [OCELOT_STAT_TX_GREEN_PRIO_2] = { .name = "tx_green_prio_2", - .offset = 0x58, + .reg = SYS_COUNT_TX_GREEN_PRIO_2, }, [OCELOT_STAT_TX_GREEN_PRIO_3] = { .name = "tx_green_prio_3", - .offset = 0x59, + .reg = SYS_COUNT_TX_GREEN_PRIO_3, }, [OCELOT_STAT_TX_GREEN_PRIO_4] = { .name = "tx_green_prio_4", - .offset = 0x5A, + .reg = SYS_COUNT_TX_GREEN_PRIO_4, }, [OCELOT_STAT_TX_GREEN_PRIO_5] = { .name = "tx_green_prio_5", - .offset = 0x5B, + .reg = SYS_COUNT_TX_GREEN_PRIO_5, }, [OCELOT_STAT_TX_GREEN_PRIO_6] = { .name = "tx_green_prio_6", - .offset = 0x5C, + .reg = SYS_COUNT_TX_GREEN_PRIO_6, }, [OCELOT_STAT_TX_GREEN_PRIO_7] = { .name = "tx_green_prio_7", - .offset = 0x5D, + .reg = SYS_COUNT_TX_GREEN_PRIO_7, }, [OCELOT_STAT_TX_AGED] = { .name = "tx_aged", - .offset = 0x5E, + .reg = SYS_COUNT_TX_AGING, }, [OCELOT_STAT_DROP_LOCAL] = { .name = "drop_local", - .offset = 0x80, + .reg = SYS_COUNT_DROP_LOCAL, }, [OCELOT_STAT_DROP_TAIL] = { .name = "drop_tail", - .offset = 0x81, + .reg = SYS_COUNT_DROP_TAIL, }, [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { .name = "drop_yellow_prio_0", - .offset = 0x82, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_0, }, [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { .name = "drop_yellow_prio_1", - .offset = 0x83, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_1, }, [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { .name = "drop_yellow_prio_2", - .offset = 0x84, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_2, }, [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { .name = "drop_yellow_prio_3", - .offset = 0x85, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_3, }, [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { .name = "drop_yellow_prio_4", - .offset = 0x86, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_4, }, [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { .name = "drop_yellow_prio_5", - .offset = 0x87, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_5, }, [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { .name = "drop_yellow_prio_6", - .offset = 0x88, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_6, }, [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { .name = "drop_yellow_prio_7", - .offset = 0x89, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_7, }, [OCELOT_STAT_DROP_GREEN_PRIO_0] = { .name = "drop_green_prio_0", - .offset = 0x8A, + .reg = SYS_COUNT_DROP_GREEN_PRIO_0, }, [OCELOT_STAT_DROP_GREEN_PRIO_1] = { .name = "drop_green_prio_1", - .offset = 0x8B, + .reg = SYS_COUNT_DROP_GREEN_PRIO_1, }, [OCELOT_STAT_DROP_GREEN_PRIO_2] = { .name = "drop_green_prio_2", - .offset = 0x8C, + .reg = SYS_COUNT_DROP_GREEN_PRIO_2, }, [OCELOT_STAT_DROP_GREEN_PRIO_3] = { .name = "drop_green_prio_3", - .offset = 0x8D, + .reg = SYS_COUNT_DROP_GREEN_PRIO_3, }, [OCELOT_STAT_DROP_GREEN_PRIO_4] = { .name = "drop_green_prio_4", - .offset = 0x8E, + .reg = SYS_COUNT_DROP_GREEN_PRIO_4, }, [OCELOT_STAT_DROP_GREEN_PRIO_5] = { .name = "drop_green_prio_5", - .offset = 0x8F, + .reg = SYS_COUNT_DROP_GREEN_PRIO_5, }, [OCELOT_STAT_DROP_GREEN_PRIO_6] = { .name = "drop_green_prio_6", - .offset = 0x90, + .reg = SYS_COUNT_DROP_GREEN_PRIO_6, }, [OCELOT_STAT_DROP_GREEN_PRIO_7] = { .name = "drop_green_prio_7", - .offset = 0x91, + .reg = SYS_COUNT_DROP_GREEN_PRIO_7, }, }; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 68991b021c56..306026e6aa11 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1881,9 +1881,8 @@ static int ocelot_port_update_stats(struct ocelot *ocelot, int port) ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG); list_for_each_entry(region, &ocelot->stats_regions, node) { - err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, - region->offset, region->buf, - region->count); + err = ocelot_bulk_read(ocelot, region->base, region->buf, + region->count); if (err) return err; @@ -1978,7 +1977,7 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) if (ocelot->stats_layout[i].name[0] == '\0') continue; - if (region && ocelot->stats_layout[i].offset == last + 1) { + if (region && ocelot->stats_layout[i].reg == last + 4) { region->count++; } else { region = devm_kzalloc(ocelot->dev, sizeof(*region), @@ -1986,12 +1985,12 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) if (!region) return -ENOMEM; - region->offset = ocelot->stats_layout[i].offset; + region->base = ocelot->stats_layout[i].reg; region->count = 1; list_add_tail(®ion->node, &ocelot->stats_regions); } - last = ocelot->stats_layout[i].offset; + last = ocelot->stats_layout[i].reg; } list_for_each_entry(region, &ocelot->stats_regions, node) { diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 9ff910560043..9c488953f541 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -99,375 +99,375 @@ static const struct reg_field ocelot_regfields[REGFIELD_MAX] = { static const struct ocelot_stat_layout ocelot_stats_layout[OCELOT_NUM_STATS] = { [OCELOT_STAT_RX_OCTETS] = { .name = "rx_octets", - .offset = 0x00, + .reg = SYS_COUNT_RX_OCTETS, }, [OCELOT_STAT_RX_UNICAST] = { .name = "rx_unicast", - .offset = 0x01, + .reg = SYS_COUNT_RX_UNICAST, }, [OCELOT_STAT_RX_MULTICAST] = { .name = "rx_multicast", - .offset = 0x02, + .reg = SYS_COUNT_RX_MULTICAST, }, [OCELOT_STAT_RX_BROADCAST] = { .name = "rx_broadcast", - .offset = 0x03, + .reg = SYS_COUNT_RX_BROADCAST, }, [OCELOT_STAT_RX_SHORTS] = { .name = "rx_shorts", - .offset = 0x04, + .reg = SYS_COUNT_RX_SHORTS, }, [OCELOT_STAT_RX_FRAGMENTS] = { .name = "rx_fragments", - .offset = 0x05, + .reg = SYS_COUNT_RX_FRAGMENTS, }, [OCELOT_STAT_RX_JABBERS] = { .name = "rx_jabbers", - .offset = 0x06, + .reg = SYS_COUNT_RX_JABBERS, }, [OCELOT_STAT_RX_CRC_ALIGN_ERRS] = { .name = "rx_crc_align_errs", - .offset = 0x07, + .reg = SYS_COUNT_RX_CRC_ALIGN_ERRS, }, [OCELOT_STAT_RX_SYM_ERRS] = { .name = "rx_sym_errs", - .offset = 0x08, + .reg = SYS_COUNT_RX_SYM_ERRS, }, [OCELOT_STAT_RX_64] = { .name = "rx_frames_below_65_octets", - .offset = 0x09, + .reg = SYS_COUNT_RX_64, }, [OCELOT_STAT_RX_65_127] = { .name = "rx_frames_65_to_127_octets", - .offset = 0x0A, + .reg = SYS_COUNT_RX_65_127, }, [OCELOT_STAT_RX_128_255] = { .name = "rx_frames_128_to_255_octets", - .offset = 0x0B, + .reg = SYS_COUNT_RX_128_255, }, [OCELOT_STAT_RX_256_511] = { .name = "rx_frames_256_to_511_octets", - .offset = 0x0C, + .reg = SYS_COUNT_RX_256_511, }, [OCELOT_STAT_RX_512_1023] = { .name = "rx_frames_512_to_1023_octets", - .offset = 0x0D, + .reg = SYS_COUNT_RX_512_1023, }, [OCELOT_STAT_RX_1024_1526] = { .name = "rx_frames_1024_to_1526_octets", - .offset = 0x0E, + .reg = SYS_COUNT_RX_1024_1526, }, [OCELOT_STAT_RX_1527_MAX] = { .name = "rx_frames_over_1526_octets", - .offset = 0x0F, + .reg = SYS_COUNT_RX_1527_MAX, }, [OCELOT_STAT_RX_PAUSE] = { .name = "rx_pause", - .offset = 0x10, + .reg = SYS_COUNT_RX_PAUSE, }, [OCELOT_STAT_RX_CONTROL] = { .name = "rx_control", - .offset = 0x11, + .reg = SYS_COUNT_RX_CONTROL, }, [OCELOT_STAT_RX_LONGS] = { .name = "rx_longs", - .offset = 0x12, + .reg = SYS_COUNT_RX_LONGS, }, [OCELOT_STAT_RX_CLASSIFIED_DROPS] = { .name = "rx_classified_drops", - .offset = 0x13, + .reg = SYS_COUNT_RX_CLASSIFIED_DROPS, }, [OCELOT_STAT_RX_RED_PRIO_0] = { .name = "rx_red_prio_0", - .offset = 0x14, + .reg = SYS_COUNT_RX_RED_PRIO_0, }, [OCELOT_STAT_RX_RED_PRIO_1] = { .name = "rx_red_prio_1", - .offset = 0x15, + .reg = SYS_COUNT_RX_RED_PRIO_1, }, [OCELOT_STAT_RX_RED_PRIO_2] = { .name = "rx_red_prio_2", - .offset = 0x16, + .reg = SYS_COUNT_RX_RED_PRIO_2, }, [OCELOT_STAT_RX_RED_PRIO_3] = { .name = "rx_red_prio_3", - .offset = 0x17, + .reg = SYS_COUNT_RX_RED_PRIO_3, }, [OCELOT_STAT_RX_RED_PRIO_4] = { .name = "rx_red_prio_4", - .offset = 0x18, + .reg = SYS_COUNT_RX_RED_PRIO_4, }, [OCELOT_STAT_RX_RED_PRIO_5] = { .name = "rx_red_prio_5", - .offset = 0x19, + .reg = SYS_COUNT_RX_RED_PRIO_5, }, [OCELOT_STAT_RX_RED_PRIO_6] = { .name = "rx_red_prio_6", - .offset = 0x1A, + .reg = SYS_COUNT_RX_RED_PRIO_6, }, [OCELOT_STAT_RX_RED_PRIO_7] = { .name = "rx_red_prio_7", - .offset = 0x1B, + .reg = SYS_COUNT_RX_RED_PRIO_7, }, [OCELOT_STAT_RX_YELLOW_PRIO_0] = { .name = "rx_yellow_prio_0", - .offset = 0x1C, + .reg = SYS_COUNT_RX_YELLOW_PRIO_0, }, [OCELOT_STAT_RX_YELLOW_PRIO_1] = { .name = "rx_yellow_prio_1", - .offset = 0x1D, + .reg = SYS_COUNT_RX_YELLOW_PRIO_1, }, [OCELOT_STAT_RX_YELLOW_PRIO_2] = { .name = "rx_yellow_prio_2", - .offset = 0x1E, + .reg = SYS_COUNT_RX_YELLOW_PRIO_2, }, [OCELOT_STAT_RX_YELLOW_PRIO_3] = { .name = "rx_yellow_prio_3", - .offset = 0x1F, + .reg = SYS_COUNT_RX_YELLOW_PRIO_3, }, [OCELOT_STAT_RX_YELLOW_PRIO_4] = { .name = "rx_yellow_prio_4", - .offset = 0x20, + .reg = SYS_COUNT_RX_YELLOW_PRIO_4, }, [OCELOT_STAT_RX_YELLOW_PRIO_5] = { .name = "rx_yellow_prio_5", - .offset = 0x21, + .reg = SYS_COUNT_RX_YELLOW_PRIO_5, }, [OCELOT_STAT_RX_YELLOW_PRIO_6] = { .name = "rx_yellow_prio_6", - .offset = 0x22, + .reg = SYS_COUNT_RX_YELLOW_PRIO_6, }, [OCELOT_STAT_RX_YELLOW_PRIO_7] = { .name = "rx_yellow_prio_7", - .offset = 0x23, + .reg = SYS_COUNT_RX_YELLOW_PRIO_7, }, [OCELOT_STAT_RX_GREEN_PRIO_0] = { .name = "rx_green_prio_0", - .offset = 0x24, + .reg = SYS_COUNT_RX_GREEN_PRIO_0, }, [OCELOT_STAT_RX_GREEN_PRIO_1] = { .name = "rx_green_prio_1", - .offset = 0x25, + .reg = SYS_COUNT_RX_GREEN_PRIO_1, }, [OCELOT_STAT_RX_GREEN_PRIO_2] = { .name = "rx_green_prio_2", - .offset = 0x26, + .reg = SYS_COUNT_RX_GREEN_PRIO_2, }, [OCELOT_STAT_RX_GREEN_PRIO_3] = { .name = "rx_green_prio_3", - .offset = 0x27, + .reg = SYS_COUNT_RX_GREEN_PRIO_3, }, [OCELOT_STAT_RX_GREEN_PRIO_4] = { .name = "rx_green_prio_4", - .offset = 0x28, + .reg = SYS_COUNT_RX_GREEN_PRIO_4, }, [OCELOT_STAT_RX_GREEN_PRIO_5] = { .name = "rx_green_prio_5", - .offset = 0x29, + .reg = SYS_COUNT_RX_GREEN_PRIO_5, }, [OCELOT_STAT_RX_GREEN_PRIO_6] = { .name = "rx_green_prio_6", - .offset = 0x2A, + .reg = SYS_COUNT_RX_GREEN_PRIO_6, }, [OCELOT_STAT_RX_GREEN_PRIO_7] = { .name = "rx_green_prio_7", - .offset = 0x2B, + .reg = SYS_COUNT_RX_GREEN_PRIO_7, }, [OCELOT_STAT_TX_OCTETS] = { .name = "tx_octets", - .offset = 0x40, + .reg = SYS_COUNT_TX_OCTETS, }, [OCELOT_STAT_TX_UNICAST] = { .name = "tx_unicast", - .offset = 0x41, + .reg = SYS_COUNT_TX_UNICAST, }, [OCELOT_STAT_TX_MULTICAST] = { .name = "tx_multicast", - .offset = 0x42, + .reg = SYS_COUNT_TX_MULTICAST, }, [OCELOT_STAT_TX_BROADCAST] = { .name = "tx_broadcast", - .offset = 0x43, + .reg = SYS_COUNT_TX_BROADCAST, }, [OCELOT_STAT_TX_COLLISION] = { .name = "tx_collision", - .offset = 0x44, + .reg = SYS_COUNT_TX_COLLISION, }, [OCELOT_STAT_TX_DROPS] = { .name = "tx_drops", - .offset = 0x45, + .reg = SYS_COUNT_TX_DROPS, }, [OCELOT_STAT_TX_PAUSE] = { .name = "tx_pause", - .offset = 0x46, + .reg = SYS_COUNT_TX_PAUSE, }, [OCELOT_STAT_TX_64] = { .name = "tx_frames_below_65_octets", - .offset = 0x47, + .reg = SYS_COUNT_TX_64, }, [OCELOT_STAT_TX_65_127] = { .name = "tx_frames_65_to_127_octets", - .offset = 0x48, + .reg = SYS_COUNT_TX_65_127, }, [OCELOT_STAT_TX_128_255] = { .name = "tx_frames_128_255_octets", - .offset = 0x49, + .reg = SYS_COUNT_TX_128_255, }, [OCELOT_STAT_TX_256_511] = { .name = "tx_frames_256_511_octets", - .offset = 0x4A, + .reg = SYS_COUNT_TX_256_511, }, [OCELOT_STAT_TX_512_1023] = { .name = "tx_frames_512_1023_octets", - .offset = 0x4B, + .reg = SYS_COUNT_TX_512_1023, }, [OCELOT_STAT_TX_1024_1526] = { .name = "tx_frames_1024_1526_octets", - .offset = 0x4C, + .reg = SYS_COUNT_TX_1024_1526, }, [OCELOT_STAT_TX_1527_MAX] = { .name = "tx_frames_over_1526_octets", - .offset = 0x4D, + .reg = SYS_COUNT_TX_1527_MAX, }, [OCELOT_STAT_TX_YELLOW_PRIO_0] = { .name = "tx_yellow_prio_0", - .offset = 0x4E, + .reg = SYS_COUNT_TX_YELLOW_PRIO_0, }, [OCELOT_STAT_TX_YELLOW_PRIO_1] = { .name = "tx_yellow_prio_1", - .offset = 0x4F, + .reg = SYS_COUNT_TX_YELLOW_PRIO_1, }, [OCELOT_STAT_TX_YELLOW_PRIO_2] = { .name = "tx_yellow_prio_2", - .offset = 0x50, + .reg = SYS_COUNT_TX_YELLOW_PRIO_2, }, [OCELOT_STAT_TX_YELLOW_PRIO_3] = { .name = "tx_yellow_prio_3", - .offset = 0x51, + .reg = SYS_COUNT_TX_YELLOW_PRIO_3, }, [OCELOT_STAT_TX_YELLOW_PRIO_4] = { .name = "tx_yellow_prio_4", - .offset = 0x52, + .reg = SYS_COUNT_TX_YELLOW_PRIO_4, }, [OCELOT_STAT_TX_YELLOW_PRIO_5] = { .name = "tx_yellow_prio_5", - .offset = 0x53, + .reg = SYS_COUNT_TX_YELLOW_PRIO_5, }, [OCELOT_STAT_TX_YELLOW_PRIO_6] = { .name = "tx_yellow_prio_6", - .offset = 0x54, + .reg = SYS_COUNT_TX_YELLOW_PRIO_6, }, [OCELOT_STAT_TX_YELLOW_PRIO_7] = { .name = "tx_yellow_prio_7", - .offset = 0x55, + .reg = SYS_COUNT_TX_YELLOW_PRIO_7, }, [OCELOT_STAT_TX_GREEN_PRIO_0] = { .name = "tx_green_prio_0", - .offset = 0x56, + .reg = SYS_COUNT_TX_GREEN_PRIO_0, }, [OCELOT_STAT_TX_GREEN_PRIO_1] = { .name = "tx_green_prio_1", - .offset = 0x57, + .reg = SYS_COUNT_TX_GREEN_PRIO_1, }, [OCELOT_STAT_TX_GREEN_PRIO_2] = { .name = "tx_green_prio_2", - .offset = 0x58, + .reg = SYS_COUNT_TX_GREEN_PRIO_2, }, [OCELOT_STAT_TX_GREEN_PRIO_3] = { .name = "tx_green_prio_3", - .offset = 0x59, + .reg = SYS_COUNT_TX_GREEN_PRIO_3, }, [OCELOT_STAT_TX_GREEN_PRIO_4] = { .name = "tx_green_prio_4", - .offset = 0x5A, + .reg = SYS_COUNT_TX_GREEN_PRIO_4, }, [OCELOT_STAT_TX_GREEN_PRIO_5] = { .name = "tx_green_prio_5", - .offset = 0x5B, + .reg = SYS_COUNT_TX_GREEN_PRIO_5, }, [OCELOT_STAT_TX_GREEN_PRIO_6] = { .name = "tx_green_prio_6", - .offset = 0x5C, + .reg = SYS_COUNT_TX_GREEN_PRIO_6, }, [OCELOT_STAT_TX_GREEN_PRIO_7] = { .name = "tx_green_prio_7", - .offset = 0x5D, + .reg = SYS_COUNT_TX_GREEN_PRIO_7, }, [OCELOT_STAT_TX_AGED] = { .name = "tx_aged", - .offset = 0x5E, + .reg = SYS_COUNT_TX_AGING, }, [OCELOT_STAT_DROP_LOCAL] = { .name = "drop_local", - .offset = 0x80, + .reg = SYS_COUNT_DROP_LOCAL, }, [OCELOT_STAT_DROP_TAIL] = { .name = "drop_tail", - .offset = 0x81, + .reg = SYS_COUNT_DROP_TAIL, }, [OCELOT_STAT_DROP_YELLOW_PRIO_0] = { .name = "drop_yellow_prio_0", - .offset = 0x82, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_0, }, [OCELOT_STAT_DROP_YELLOW_PRIO_1] = { .name = "drop_yellow_prio_1", - .offset = 0x83, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_1, }, [OCELOT_STAT_DROP_YELLOW_PRIO_2] = { .name = "drop_yellow_prio_2", - .offset = 0x84, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_2, }, [OCELOT_STAT_DROP_YELLOW_PRIO_3] = { .name = "drop_yellow_prio_3", - .offset = 0x85, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_3, }, [OCELOT_STAT_DROP_YELLOW_PRIO_4] = { .name = "drop_yellow_prio_4", - .offset = 0x86, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_4, }, [OCELOT_STAT_DROP_YELLOW_PRIO_5] = { .name = "drop_yellow_prio_5", - .offset = 0x87, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_5, }, [OCELOT_STAT_DROP_YELLOW_PRIO_6] = { .name = "drop_yellow_prio_6", - .offset = 0x88, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_6, }, [OCELOT_STAT_DROP_YELLOW_PRIO_7] = { .name = "drop_yellow_prio_7", - .offset = 0x89, + .reg = SYS_COUNT_DROP_YELLOW_PRIO_7, }, [OCELOT_STAT_DROP_GREEN_PRIO_0] = { .name = "drop_green_prio_0", - .offset = 0x8A, + .reg = SYS_COUNT_DROP_GREEN_PRIO_0, }, [OCELOT_STAT_DROP_GREEN_PRIO_1] = { .name = "drop_green_prio_1", - .offset = 0x8B, + .reg = SYS_COUNT_DROP_GREEN_PRIO_1, }, [OCELOT_STAT_DROP_GREEN_PRIO_2] = { .name = "drop_green_prio_2", - .offset = 0x8C, + .reg = SYS_COUNT_DROP_GREEN_PRIO_2, }, [OCELOT_STAT_DROP_GREEN_PRIO_3] = { .name = "drop_green_prio_3", - .offset = 0x8D, + .reg = SYS_COUNT_DROP_GREEN_PRIO_3, }, [OCELOT_STAT_DROP_GREEN_PRIO_4] = { .name = "drop_green_prio_4", - .offset = 0x8E, + .reg = SYS_COUNT_DROP_GREEN_PRIO_4, }, [OCELOT_STAT_DROP_GREEN_PRIO_5] = { .name = "drop_green_prio_5", - .offset = 0x8F, + .reg = SYS_COUNT_DROP_GREEN_PRIO_5, }, [OCELOT_STAT_DROP_GREEN_PRIO_6] = { .name = "drop_green_prio_6", - .offset = 0x90, + .reg = SYS_COUNT_DROP_GREEN_PRIO_6, }, [OCELOT_STAT_DROP_GREEN_PRIO_7] = { .name = "drop_green_prio_7", - .offset = 0x91, + .reg = SYS_COUNT_DROP_GREEN_PRIO_7, }, }; diff --git a/drivers/net/ethernet/mscc/vsc7514_regs.c b/drivers/net/ethernet/mscc/vsc7514_regs.c index 8ff935f7f150..9cf82ecf191c 100644 --- a/drivers/net/ethernet/mscc/vsc7514_regs.c +++ b/drivers/net/ethernet/mscc/vsc7514_regs.c @@ -188,6 +188,30 @@ const u32 vsc7514_sys_regmap[] = { REG(SYS_COUNT_RX_CONTROL, 0x000044), REG(SYS_COUNT_RX_LONGS, 0x000048), REG(SYS_COUNT_RX_CLASSIFIED_DROPS, 0x00004c), + REG(SYS_COUNT_RX_RED_PRIO_0, 0x000050), + REG(SYS_COUNT_RX_RED_PRIO_1, 0x000054), + REG(SYS_COUNT_RX_RED_PRIO_2, 0x000058), + REG(SYS_COUNT_RX_RED_PRIO_3, 0x00005c), + REG(SYS_COUNT_RX_RED_PRIO_4, 0x000060), + REG(SYS_COUNT_RX_RED_PRIO_5, 0x000064), + REG(SYS_COUNT_RX_RED_PRIO_6, 0x000068), + REG(SYS_COUNT_RX_RED_PRIO_7, 0x00006c), + REG(SYS_COUNT_RX_YELLOW_PRIO_0, 0x000070), + REG(SYS_COUNT_RX_YELLOW_PRIO_1, 0x000074), + REG(SYS_COUNT_RX_YELLOW_PRIO_2, 0x000078), + REG(SYS_COUNT_RX_YELLOW_PRIO_3, 0x00007c), + REG(SYS_COUNT_RX_YELLOW_PRIO_4, 0x000080), + REG(SYS_COUNT_RX_YELLOW_PRIO_5, 0x000084), + REG(SYS_COUNT_RX_YELLOW_PRIO_6, 0x000088), + REG(SYS_COUNT_RX_YELLOW_PRIO_7, 0x00008c), + REG(SYS_COUNT_RX_GREEN_PRIO_0, 0x000090), + REG(SYS_COUNT_RX_GREEN_PRIO_1, 0x000094), + REG(SYS_COUNT_RX_GREEN_PRIO_2, 0x000098), + REG(SYS_COUNT_RX_GREEN_PRIO_3, 0x00009c), + REG(SYS_COUNT_RX_GREEN_PRIO_4, 0x0000a0), + REG(SYS_COUNT_RX_GREEN_PRIO_5, 0x0000a4), + REG(SYS_COUNT_RX_GREEN_PRIO_6, 0x0000a8), + REG(SYS_COUNT_RX_GREEN_PRIO_7, 0x0000ac), REG(SYS_COUNT_TX_OCTETS, 0x000100), REG(SYS_COUNT_TX_UNICAST, 0x000104), REG(SYS_COUNT_TX_MULTICAST, 0x000108), @@ -202,7 +226,41 @@ const u32 vsc7514_sys_regmap[] = { REG(SYS_COUNT_TX_512_1023, 0x00012c), REG(SYS_COUNT_TX_1024_1526, 0x000130), REG(SYS_COUNT_TX_1527_MAX, 0x000134), + REG(SYS_COUNT_TX_YELLOW_PRIO_0, 0x000138), + REG(SYS_COUNT_TX_YELLOW_PRIO_1, 0x00013c), + REG(SYS_COUNT_TX_YELLOW_PRIO_2, 0x000140), + REG(SYS_COUNT_TX_YELLOW_PRIO_3, 0x000144), + REG(SYS_COUNT_TX_YELLOW_PRIO_4, 0x000148), + REG(SYS_COUNT_TX_YELLOW_PRIO_5, 0x00014c), + REG(SYS_COUNT_TX_YELLOW_PRIO_6, 0x000150), + REG(SYS_COUNT_TX_YELLOW_PRIO_7, 0x000154), + REG(SYS_COUNT_TX_GREEN_PRIO_0, 0x000158), + REG(SYS_COUNT_TX_GREEN_PRIO_1, 0x00015c), + REG(SYS_COUNT_TX_GREEN_PRIO_2, 0x000160), + REG(SYS_COUNT_TX_GREEN_PRIO_3, 0x000164), + REG(SYS_COUNT_TX_GREEN_PRIO_4, 0x000168), + REG(SYS_COUNT_TX_GREEN_PRIO_5, 0x00016c), + REG(SYS_COUNT_TX_GREEN_PRIO_6, 0x000170), + REG(SYS_COUNT_TX_GREEN_PRIO_7, 0x000174), REG(SYS_COUNT_TX_AGING, 0x000178), + REG(SYS_COUNT_DROP_LOCAL, 0x000200), + REG(SYS_COUNT_DROP_TAIL, 0x000204), + REG(SYS_COUNT_DROP_YELLOW_PRIO_0, 0x000208), + REG(SYS_COUNT_DROP_YELLOW_PRIO_1, 0x00020c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_2, 0x000210), + REG(SYS_COUNT_DROP_YELLOW_PRIO_3, 0x000214), + REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000218), + REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00021c), + REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000220), + REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000214), + REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000218), + REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00021c), + REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000220), + REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000224), + REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000228), + REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00022c), + REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000230), + REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000234), REG(SYS_RESET_CFG, 0x000508), REG(SYS_CMID, 0x00050c), REG(SYS_VLAN_ETYPE_CFG, 0x000510), diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 2428bc64cb1d..2edea901bbd5 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -338,6 +338,30 @@ enum ocelot_reg { SYS_COUNT_RX_CONTROL, SYS_COUNT_RX_LONGS, SYS_COUNT_RX_CLASSIFIED_DROPS, + SYS_COUNT_RX_RED_PRIO_0, + SYS_COUNT_RX_RED_PRIO_1, + SYS_COUNT_RX_RED_PRIO_2, + SYS_COUNT_RX_RED_PRIO_3, + SYS_COUNT_RX_RED_PRIO_4, + SYS_COUNT_RX_RED_PRIO_5, + SYS_COUNT_RX_RED_PRIO_6, + SYS_COUNT_RX_RED_PRIO_7, + SYS_COUNT_RX_YELLOW_PRIO_0, + SYS_COUNT_RX_YELLOW_PRIO_1, + SYS_COUNT_RX_YELLOW_PRIO_2, + SYS_COUNT_RX_YELLOW_PRIO_3, + SYS_COUNT_RX_YELLOW_PRIO_4, + SYS_COUNT_RX_YELLOW_PRIO_5, + SYS_COUNT_RX_YELLOW_PRIO_6, + SYS_COUNT_RX_YELLOW_PRIO_7, + SYS_COUNT_RX_GREEN_PRIO_0, + SYS_COUNT_RX_GREEN_PRIO_1, + SYS_COUNT_RX_GREEN_PRIO_2, + SYS_COUNT_RX_GREEN_PRIO_3, + SYS_COUNT_RX_GREEN_PRIO_4, + SYS_COUNT_RX_GREEN_PRIO_5, + SYS_COUNT_RX_GREEN_PRIO_6, + SYS_COUNT_RX_GREEN_PRIO_7, SYS_COUNT_TX_OCTETS, SYS_COUNT_TX_UNICAST, SYS_COUNT_TX_MULTICAST, @@ -352,7 +376,41 @@ enum ocelot_reg { SYS_COUNT_TX_512_1023, SYS_COUNT_TX_1024_1526, SYS_COUNT_TX_1527_MAX, + SYS_COUNT_TX_YELLOW_PRIO_0, + SYS_COUNT_TX_YELLOW_PRIO_1, + SYS_COUNT_TX_YELLOW_PRIO_2, + SYS_COUNT_TX_YELLOW_PRIO_3, + SYS_COUNT_TX_YELLOW_PRIO_4, + SYS_COUNT_TX_YELLOW_PRIO_5, + SYS_COUNT_TX_YELLOW_PRIO_6, + SYS_COUNT_TX_YELLOW_PRIO_7, + SYS_COUNT_TX_GREEN_PRIO_0, + SYS_COUNT_TX_GREEN_PRIO_1, + SYS_COUNT_TX_GREEN_PRIO_2, + SYS_COUNT_TX_GREEN_PRIO_3, + SYS_COUNT_TX_GREEN_PRIO_4, + SYS_COUNT_TX_GREEN_PRIO_5, + SYS_COUNT_TX_GREEN_PRIO_6, + SYS_COUNT_TX_GREEN_PRIO_7, SYS_COUNT_TX_AGING, + SYS_COUNT_DROP_LOCAL, + SYS_COUNT_DROP_TAIL, + SYS_COUNT_DROP_YELLOW_PRIO_0, + SYS_COUNT_DROP_YELLOW_PRIO_1, + SYS_COUNT_DROP_YELLOW_PRIO_2, + SYS_COUNT_DROP_YELLOW_PRIO_3, + SYS_COUNT_DROP_YELLOW_PRIO_4, + SYS_COUNT_DROP_YELLOW_PRIO_5, + SYS_COUNT_DROP_YELLOW_PRIO_6, + SYS_COUNT_DROP_YELLOW_PRIO_7, + SYS_COUNT_DROP_GREEN_PRIO_0, + SYS_COUNT_DROP_GREEN_PRIO_1, + SYS_COUNT_DROP_GREEN_PRIO_2, + SYS_COUNT_DROP_GREEN_PRIO_3, + SYS_COUNT_DROP_GREEN_PRIO_4, + SYS_COUNT_DROP_GREEN_PRIO_5, + SYS_COUNT_DROP_GREEN_PRIO_6, + SYS_COUNT_DROP_GREEN_PRIO_7, SYS_RESET_CFG, SYS_SR_ETYPE_CFG, SYS_VLAN_ETYPE_CFG, @@ -633,13 +691,13 @@ enum ocelot_stat { }; struct ocelot_stat_layout { - u32 offset; + u32 reg; char name[ETH_GSTRING_LEN]; }; struct ocelot_stats_region { struct list_head node; - u32 offset; + u32 base; int count; u32 *buf; }; @@ -877,8 +935,8 @@ struct ocelot_policer { u32 burst; /* bytes */ }; -#define ocelot_bulk_read_rix(ocelot, reg, ri, buf, count) \ - __ocelot_bulk_read_ix(ocelot, reg, reg##_RSZ * (ri), buf, count) +#define ocelot_bulk_read(ocelot, reg, buf, count) \ + __ocelot_bulk_read_ix(ocelot, reg, 0, buf, count) #define ocelot_read_ix(ocelot, reg, gi, ri) \ __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) -- cgit v1.2.3 From a8239f0342bae5a51acca967ba95b9a8ad56dd62 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 18 Aug 2022 14:35:55 +0800 Subject: blk-mq: remove unused function blk_mq_queue_stopped() blk_mq_queue_stopped() doesn't have any caller, which was found by code coverage test, thus remove it. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20220818063555.3741222-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 20 -------------------- include/linux/blk-mq.h | 1 - 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5ee62b95f3e5..5568c7d09114 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2229,26 +2229,6 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) } EXPORT_SYMBOL(blk_mq_delay_run_hw_queues); -/** - * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped - * @q: request queue. - * - * The caller is responsible for serializing this function against - * blk_mq_{start,stop}_hw_queue(). - */ -bool blk_mq_queue_stopped(struct request_queue *q) -{ - struct blk_mq_hw_ctx *hctx; - unsigned long i; - - queue_for_each_hw_ctx(q, hctx, i) - if (blk_mq_hctx_stopped(hctx)) - return true; - - return false; -} -EXPORT_SYMBOL(blk_mq_queue_stopped); - /* * This function is often used for pausing .queue_rq() by driver when * there isn't enough resource or some conditions aren't satisfied, and diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index effee1dc715a..92294a5fb083 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -857,7 +857,6 @@ void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); -bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -- cgit v1.2.3 From b5a5b9d5f28d23b84f06b45c61dcad95b07d41bc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Aug 2022 15:38:58 +0200 Subject: serial: document start_rx member at struct uart_ops Fix this doc build warning: ./include/linux/serial_core.h:397: warning: Function parameter or member 'start_rx' not described in 'uart_ops' Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5d07ae2eec8fbad87e623160f9926b178bef2744.1660829433.git.mchehab@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aef3145f2032..6e4f4765d209 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -141,6 +141,14 @@ struct gpio_desc; * Locking: none. * Interrupts: caller dependent. * + * @start_rx: ``void ()(struct uart_port *port)`` + * + * Start receiving characters. + * + * Locking: @port->lock taken. + * Interrupts: locally disabled. + * This call must not sleep + * * @stop_rx: ``void ()(struct uart_port *port)`` * * Stop receiving characters; the @port is in the process of being closed. -- cgit v1.2.3 From 7ec9fce4b31604f8415136a4c07f7dc8ad431aec Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 18 Aug 2022 10:41:18 +0300 Subject: ip_tunnel: Respect tunnel key's "flow_flags" in IP tunnels Commit 451ef36bd229 ("ip_tunnels: Add new flow flags field to ip_tunnel_key") added a "flow_flags" member to struct ip_tunnel_key which was later used by the commit in the fixes tag to avoid dropping packets with sources that aren't locally configured when set in bpf_set_tunnel_key(). VXLAN and GENEVE were made to respect this flag, ip tunnels like IPIP and GRE were not. This commit fixes this omission by making ip_tunnel_init_flow() receive the flow flags from the tunnel key in the relevant collect_md paths. Fixes: b8fff748521c ("bpf: Set flow flag to allow any source IP in bpf_tunnel_key") Signed-off-by: Eyal Birger Signed-off-by: Daniel Borkmann Reviewed-by: Paul Chaignon Link: https://lore.kernel.org/bpf/20220818074118.726639-1-eyal.birger@gmail.com --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 3 ++- include/net/ip_tunnels.h | 4 +++- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 7 ++++--- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 39904dacf4f0..b3472fb94617 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,7 +423,8 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0); + 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, + 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 63fac94f9ace..ced80e2f8b58 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -246,7 +246,8 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, struct net *net, int oif, - __u32 mark, __u32 tun_inner_hash) + __u32 mark, __u32 tun_inner_hash, + __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); @@ -263,6 +264,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; + fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5c58e21f724e..f866d6282b2b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -609,7 +609,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos & ~INET_ECN_MASK, dev_net(dev), 0, - skb->mark, skb_get_hash(skb)); + skb->mark, skb_get_hash(skb), key->flow_flags); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e65e948cab9f..019f3b0839c5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -295,7 +295,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), dev_net(dev), - tunnel->parms.link, tunnel->fwmark, 0); + tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -570,7 +570,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - dev_net(dev), 0, skb->mark, skb_get_hash(skb)); + dev_net(dev), 0, skb->mark, skb_get_hash(skb), + key->flow_flags); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; @@ -729,7 +730,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), dev_net(dev), tunnel->parms.link, - tunnel->fwmark, skb_get_hash(skb)); + tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; -- cgit v1.2.3 From c1e5c2f0cb8a22ec2e14af92afc7006491bebabb Mon Sep 17 00:00:00 2001 From: Pablo Sun Date: Thu, 4 Aug 2022 11:48:03 +0800 Subject: usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles Fix incorrect pin assignment values when connecting to a monitor with Type-C receptacle instead of a plug. According to specification, an UFP_D receptacle's pin assignment should came from the UFP_D pin assignments field (bit 23:16), while an UFP_D plug's assignments are described in the DFP_D pin assignments (bit 15:8) during Mode Discovery. For example the LG 27 UL850-W is a monitor with Type-C receptacle. The monitor responds to MODE DISCOVERY command with following DisplayPort Capability flag: dp->alt->vdo=0x140045 The existing logic only take cares of UPF_D plug case, and would take the bit 15:8 for this 0x140045 case. This results in an non-existing pin assignment 0x0 in dp_altmode_configure. To fix this problem a new set of macros are introduced to take plug/receptacle differences into consideration. Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode") Cc: stable@vger.kernel.org Co-developed-by: Pablo Sun Co-developed-by: Macpaul Lin Reviewed-by: Guillaume Ranquet Reviewed-by: Heikki Krogerus Signed-off-by: Pablo Sun Signed-off-by: Macpaul Lin Link: https://lore.kernel.org/r/20220804034803.19486-1-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 4 ++-- include/linux/usb/typec_dp.h | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index c1d8c23baa39..de66a2949e33 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -99,8 +99,8 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con) case DP_STATUS_CON_UFP_D: case DP_STATUS_CON_BOTH: /* NOTE: First acting as DP source */ conf |= DP_CONF_UFP_U_AS_UFP_D; - pin_assign = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo) & - DP_CAP_UFP_D_PIN_ASSIGN(dp->port->vdo); + pin_assign = DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo) & + DP_CAP_PIN_ASSIGN_DFP_D(dp->port->vdo); break; default: break; diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index cfb916cccd31..8d09c2f0a9b8 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -73,6 +73,11 @@ enum { #define DP_CAP_USB BIT(7) #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) #define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) +/* Get pin assignment taking plug & receptacle into consideration */ +#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ + DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ + DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) -- cgit v1.2.3 From bdd1c37a315bc50ab14066c4852bc8dcf070451e Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Tue, 16 Aug 2022 20:53:21 +0800 Subject: KVM: Rename KVM_PRIVATE_MEM_SLOTS to KVM_INTERNAL_MEM_SLOTS KVM_INTERNAL_MEM_SLOTS better reflects the fact those slots are KVM internally used (invisible to userspace) and avoids confusion to future private slots that can have different meaning. Signed-off-by: Chao Peng Message-Id: <20220816125322.1110439-2-chao.p.peng@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- include/linux/kvm_host.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ffa578cafe1..2c96c43c313a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -53,7 +53,7 @@ #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) /* memory slots that are not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 3 +#define KVM_INTERNAL_MEM_SLOTS 3 #define KVM_HALT_POLL_NS_DEFAULT 200000 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c480b1821e1..fd5c3b4715f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -656,12 +656,12 @@ struct kvm_irq_routing_table { }; #endif -#ifndef KVM_PRIVATE_MEM_SLOTS -#define KVM_PRIVATE_MEM_SLOTS 0 +#ifndef KVM_INTERNAL_MEM_SLOTS +#define KVM_INTERNAL_MEM_SLOTS 0 #endif #define KVM_MEM_SLOTS_NUM SHRT_MAX -#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS) +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 20ec3ebd707c77fb9b11b37193449193d4649f33 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Tue, 16 Aug 2022 20:53:22 +0800 Subject: KVM: Rename mmu_notifier_* to mmu_invalidate_* The motivation of this renaming is to make these variables and related helper functions less mmu_notifier bound and can also be used for non mmu_notifier based page invalidation. mmu_invalidate_* was chosen to better describe the purpose of 'invalidating' a page that those variables are used for. - mmu_notifier_seq/range_start/range_end are renamed to mmu_invalidate_seq/range_start/range_end. - mmu_notifier_retry{_hva} helper functions are renamed to mmu_invalidate_retry{_hva}. - mmu_notifier_count is renamed to mmu_invalidate_in_progress to avoid confusion with mn_active_invalidate_count. - While here, also update kvm_inc/dec_notifier_count() to kvm_mmu_invalidate_begin/end() to match the change for mmu_notifier_count. No functional change intended. Signed-off-by: Chao Peng Message-Id: <20220816125322.1110439-3-chao.p.peng@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/mmu.c | 8 ++--- arch/mips/kvm/mmu.c | 12 +++---- arch/powerpc/include/asm/kvm_book3s_64.h | 2 +- arch/powerpc/kvm/book3s_64_mmu_host.c | 4 +-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 +-- arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 ++-- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 8 ++--- arch/powerpc/kvm/e500_mmu_host.c | 4 +-- arch/riscv/kvm/mmu.c | 4 +-- arch/x86/kvm/mmu/mmu.c | 14 ++++---- arch/x86/kvm/mmu/paging_tmpl.h | 4 +-- include/linux/kvm_host.h | 60 +++++++++++++++++--------------- virt/kvm/kvm_main.c | 52 ++++++++++++++------------- virt/kvm/pfncache.c | 17 ++++----- 15 files changed, 103 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 87f1cd0df36e..c9a13e487187 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -993,7 +993,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, * THP doesn't start to split while we are adjusting the * refcounts. * - * We are sure this doesn't happen, because mmu_notifier_retry + * We are sure this doesn't happen, because mmu_invalidate_retry * was successful and we are holding the mmu_lock, so if this * THP is trying to split, it will be blocked in the mmu * notifier before touching any of the pages, specifically @@ -1188,9 +1188,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return ret; } - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq happens before we call + * Ensure the read of mmu_invalidate_seq happens before we call * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk * the page we just got a reference to gets unmapped before we have a * chance to grab the mmu_lock, which ensure that if the page gets @@ -1246,7 +1246,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else write_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index db17e870bdff..74cd64a24d05 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -615,17 +615,17 @@ retry: * Used to check for invalidations in progress, of the pfn that is * returned by pfn_to_pfn_prot below. */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; /* - * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in - * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads + * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't * risk the page we get a reference to getting unmapped before we have a - * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * chance to grab the mmu_lock without mmu_invalidate_retry() noticing. * * This smp_rmb() pairs with the effective smp_wmb() of the combination * of the pte_unmap_unlock() after the PTE is zapped, and the * spin_lock() in kvm_mmu_notifier_invalidate_() before - * mmu_notifier_seq is incremented. + * mmu_invalidate_seq is incremented. */ smp_rmb(); @@ -638,7 +638,7 @@ retry: spin_lock(&kvm->mmu_lock); /* Check if an invalidation has taken place since we got pfn */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 4def2bd17b9b..d49065af08e9 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -666,7 +666,7 @@ static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq, VM_WARN(!spin_is_locked(&kvm->mmu_lock), "%s called with kvm mmu_lock not held \n", __func__); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) return NULL; pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift); diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 1ae09992c9ea..bc6a381b5346 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -90,7 +90,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, unsigned long pfn; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Get host physical address for gpa */ @@ -151,7 +151,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, cpte = kvmppc_mmu_hpte_cache_next(vcpu); spin_lock(&kvm->mmu_lock); - if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { + if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) { r = -EAGAIN; goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 514fd45c1994..e9744b41a226 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -578,7 +578,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, return -EFAULT; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); ret = -EFAULT; @@ -693,7 +693,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, /* Check if we might have been invalidated; let the guest retry if so */ ret = RESUME_GUEST; - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { + if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) { unlock_rmap(rmap); goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 9d4b3feda3b6..5d5e12f3bf86 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -640,7 +640,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Check if we might have been invalidated; let the guest retry if so */ spin_lock(&kvm->mmu_lock); ret = -EAGAIN; - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; /* Now traverse again under the lock and change the tree */ @@ -830,7 +830,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, bool large_enable; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -1191,7 +1191,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, * Increase the mmu notifier sequence number to prevent any page * fault that read the memslot earlier from writing a PTE. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; spin_unlock(&kvm->mmu_lock); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index be8249cc6107..5a64a1341e6f 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1580,7 +1580,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, /* 2. Find the host pte for this L1 guest real address */ /* Used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* See if can find translation in our partition scoped tables for L1 */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2257fb18cb72..5a05953ae13f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -219,7 +219,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, g_ptel = ptel; /* used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Find the memslot (if any) for this address */ @@ -366,7 +366,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -932,7 +932,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, int i; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); @@ -960,7 +960,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, long ret = H_SUCCESS; /* Used later to detect if we might have been invalidated */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 7f16afc331ef..05668e964140 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -339,7 +339,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long flags; /* used to check for invalidations in progress */ - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* @@ -460,7 +460,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_invalidate_retry(kvm, mmu_seq)) { ret = -EAGAIN; goto out; } diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 3a35b2d95697..3620ecac2fa1 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -666,7 +666,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return ret; } - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); if (hfn == KVM_PFN_ERR_HWPOISON) { @@ -686,7 +686,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); - if (mmu_notifier_retry(kvm, mmu_seq)) + if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; if (writable) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index eccddb136954..126fa9aec64c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2914,7 +2914,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; __direct_pte_prefetch(vcpu, sp, sptep); @@ -2928,7 +2928,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) * * There are several ways to safely use this helper: * - * - Check mmu_notifier_retry_hva() after grabbing the mapping level, before + * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * @@ -3056,7 +3056,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault return; /* - * mmu_notifier_retry() was successful and mmu_lock is held, so + * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. */ fault->goal_level = fault->req_level; @@ -4203,7 +4203,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, return true; return fault->slot && - mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva); + mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -4227,7 +4227,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); @@ -6055,7 +6055,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_inc_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6069,7 +6069,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_dec_notifier_count(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); write_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f5958071220c..39e0205e7300 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -589,7 +589,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If addresses are being invalidated, skip prefetching to avoid * accidentally prefetching those addresses. */ - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(vcpu->kvm->mmu_invalidate_in_progress)) return; if (sp->role.direct) @@ -838,7 +838,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else fault->max_level = walker.level; - mmu_seq = vcpu->kvm->mmu_notifier_seq; + mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); r = kvm_faultin_pfn(vcpu, fault); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fd5c3b4715f8..f4519d3689e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -765,10 +765,10 @@ struct kvm { #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) struct mmu_notifier mmu_notifier; - unsigned long mmu_notifier_seq; - long mmu_notifier_count; - unsigned long mmu_notifier_range_start; - unsigned long mmu_notifier_range_end; + unsigned long mmu_invalidate_seq; + long mmu_invalidate_in_progress; + unsigned long mmu_invalidate_range_start; + unsigned long mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; @@ -1357,10 +1357,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end); +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end); +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -1907,42 +1907,44 @@ extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) -static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) +static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { - if (unlikely(kvm->mmu_notifier_count)) + if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* - * Ensure the read of mmu_notifier_count happens before the read - * of mmu_notifier_seq. This interacts with the smp_wmb() in - * mmu_notifier_invalidate_range_end to make sure that the caller - * either sees the old (non-zero) value of mmu_notifier_count or - * the new (incremented) value of mmu_notifier_seq. - * PowerPC Book3s HV KVM calls this under a per-page lock - * rather than under kvm->mmu_lock, for scalability, so - * can't rely on kvm->mmu_lock to keep things ordered. + * Ensure the read of mmu_invalidate_in_progress happens before + * the read of mmu_invalidate_seq. This interacts with the + * smp_wmb() in mmu_notifier_invalidate_range_end to make sure + * that the caller either sees the old (non-zero) value of + * mmu_invalidate_in_progress or the new (incremented) value of + * mmu_invalidate_seq. + * + * PowerPC Book3s HV KVM calls this under a per-page lock rather + * than under kvm->mmu_lock, for scalability, so can't rely on + * kvm->mmu_lock to keep things ordered. */ smp_rmb(); - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } -static inline int mmu_notifier_retry_hva(struct kvm *kvm, - unsigned long mmu_seq, - unsigned long hva) +static inline int mmu_invalidate_retry_hva(struct kvm *kvm, + unsigned long mmu_seq, + unsigned long hva) { lockdep_assert_held(&kvm->mmu_lock); /* - * If mmu_notifier_count is non-zero, then the range maintained by - * kvm_mmu_notifier_invalidate_range_start contains all addresses that - * might be being invalidated. Note that it may include some false + * If mmu_invalidate_in_progress is non-zero, then the range maintained + * by kvm_mmu_notifier_invalidate_range_start contains all addresses + * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ - if (unlikely(kvm->mmu_notifier_count) && - hva >= kvm->mmu_notifier_range_start && - hva < kvm->mmu_notifier_range_end) + if (unlikely(kvm->mmu_invalidate_in_progress) && + hva >= kvm->mmu_invalidate_range_start && + hva < kvm->mmu_invalidate_range_end) return 1; - if (kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 44b92d773156..5142c054b03c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -702,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, /* * .change_pte() must be surrounded by .invalidate_range_{start,end}(). - * If mmu_notifier_count is zero, then no in-progress invalidations, - * including this one, found a relevant memslot at start(); rechecking - * memslots here is unnecessary. Note, a false positive (count elevated - * by a different invalidation) is sub-optimal but functionally ok. + * If mmu_invalidate_in_progress is zero, then no in-progress + * invalidations, including this one, found a relevant memslot at + * start(); rechecking memslots here is unnecessary. Note, a false + * positive (count elevated by a different invalidation) is sub-optimal + * but functionally ok. */ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - if (!READ_ONCE(kvm->mmu_notifier_count)) + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); } -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * The count increase must become visible at unlock time as no * spte can be established without taking the mmu_lock and * count is also read inside the mmu_lock critical section. */ - kvm->mmu_notifier_count++; - if (likely(kvm->mmu_notifier_count == 1)) { - kvm->mmu_notifier_range_start = start; - kvm->mmu_notifier_range_end = end; + kvm->mmu_invalidate_in_progress++; + if (likely(kvm->mmu_invalidate_in_progress == 1)) { + kvm->mmu_invalidate_range_start = start; + kvm->mmu_invalidate_range_end = end; } else { /* * Fully tracking multiple concurrent ranges has diminishing @@ -736,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, * accumulate and persist until all outstanding invalidates * complete. */ - kvm->mmu_notifier_range_start = - min(kvm->mmu_notifier_range_start, start); - kvm->mmu_notifier_range_end = - max(kvm->mmu_notifier_range_end, end); + kvm->mmu_invalidate_range_start = + min(kvm->mmu_invalidate_range_start, start); + kvm->mmu_invalidate_range_end = + max(kvm->mmu_invalidate_range_end, end); } } @@ -752,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = kvm_unmap_gfn_range, - .on_lock = kvm_inc_notifier_count, + .on_lock = kvm_mmu_invalidate_begin, .on_unlock = kvm_arch_guest_memory_reclaimed, .flush_on_ret = true, .may_block = mmu_notifier_range_blockable(range), @@ -763,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, /* * Prevent memslot modification between range_start() and range_end() * so that conditionally locking provides the same result in both - * functions. Without that guarantee, the mmu_notifier_count + * functions. Without that guarantee, the mmu_invalidate_in_progress * adjustments will be imbalanced. * * Pairs with the decrement in range_end(). @@ -779,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * any given time, and the caches themselves can check for hva overlap, * i.e. don't need to rely on memslot overlap checks for performance. * Because this runs without holding mmu_lock, the pfn caches must use - * mn_active_invalidate_count (see above) instead of mmu_notifier_count. + * mn_active_invalidate_count (see above) instead of + * mmu_invalidate_in_progress. */ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, hva_range.may_block); @@ -789,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have * been freed. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; smp_wmb(); /* * The above sequence increase must be visible before the * below count decrease, which is ensured by the smp_wmb above - * in conjunction with the smp_rmb in mmu_notifier_retry(). + * in conjunction with the smp_rmb in mmu_invalidate_retry(). */ - kvm->mmu_notifier_count--; + kvm->mmu_invalidate_in_progress--; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -816,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = (void *)kvm_null_fn, - .on_lock = kvm_dec_notifier_count, + .on_lock = kvm_mmu_invalidate_end, .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = mmu_notifier_range_blockable(range), @@ -837,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, if (wake) rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); - BUG_ON(kvm->mmu_notifier_count < 0); + BUG_ON(kvm->mmu_invalidate_in_progress < 0); } static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index ab519f72f2cd..68ff41d39545 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -112,27 +112,28 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s { /* * mn_active_invalidate_count acts for all intents and purposes - * like mmu_notifier_count here; but the latter cannot be used - * here because the invalidation of caches in the mmu_notifier - * event occurs _before_ mmu_notifier_count is elevated. + * like mmu_invalidate_in_progress here; but the latter cannot + * be used here because the invalidation of caches in the + * mmu_notifier event occurs _before_ mmu_invalidate_in_progress + * is elevated. * * Note, it does not matter that mn_active_invalidate_count * is not protected by gpc->lock. It is guaranteed to * be elevated before the mmu_notifier acquires gpc->lock, and - * isn't dropped until after mmu_notifier_seq is updated. + * isn't dropped until after mmu_invalidate_seq is updated. */ if (kvm->mn_active_invalidate_count) return true; /* * Ensure mn_active_invalidate_count is read before - * mmu_notifier_seq. This pairs with the smp_wmb() in + * mmu_invalidate_seq. This pairs with the smp_wmb() in * mmu_notifier_invalidate_range_end() to guarantee either the * old (non-zero) value of mn_active_invalidate_count or the - * new (incremented) value of mmu_notifier_seq is observed. + * new (incremented) value of mmu_invalidate_seq is observed. */ smp_rmb(); - return kvm->mmu_notifier_seq != mmu_seq; + return kvm->mmu_invalidate_seq != mmu_seq; } static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) @@ -155,7 +156,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) gpc->valid = false; do { - mmu_seq = kvm->mmu_notifier_seq; + mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); write_unlock_irq(&gpc->lock); -- cgit v1.2.3 From a357f7b4583ebf81d19c95aef57497ae81c5f63c Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 17 Aug 2022 23:20:08 +0800 Subject: ata: libata: Set __ATA_BASE_SHT max_sectors Commit 0568e6122574 ("ata: libata-scsi: cap ata_device->max_sectors according to shost->max_sectors") inadvertently capped the max_sectors value for some SATA disks to a value which is lower than we would want. For a device which supports LBA48, we would previously have request queue max_sectors_kb and max_hw_sectors_kb values of 1280 and 32767 respectively. For AHCI controllers, the value chosen for shost max sectors comes from the minimum of the SCSI host default max sectors in SCSI_DEFAULT_MAX_SECTORS (1024) and the shost DMA device mapping limit. This means that we would now set the max_sectors_kb and max_hw_sectors_kb values for a disk which supports LBA48 at 512, ignoring DMA mapping limit. As report by Oliver at [0], this caused a performance regression. Fix by picking a large enough max sectors value for ATA host controllers such that we don't needlessly reduce max_sectors_kb for LBA48 disks. [0] https://lore.kernel.org/linux-ide/YvsGbidf3na5FpGb@xsang-OptiPlex-9020/T/#m22d9fc5ad15af66066dd9fecf3d50f1b1ef11da3 Fixes: 0568e6122574 ("ata: libata-scsi: cap ata_device->max_sectors according to shost->max_sectors") Reported-by: Oliver Sang Signed-off-by: John Garry Signed-off-by: Damien Le Moal --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0269ff114f5a..698032e5ef2d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1382,7 +1382,8 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .proc_name = drv_name, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ - .unlock_native_capacity = ata_scsi_unlock_native_capacity + .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ + .max_sectors = ATA_MAX_SECTORS_LBA48 #define ATA_SUBBASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ -- cgit v1.2.3 From 5535be3099717646781ce1540cf725965d680e7b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 9 Aug 2022 22:56:40 +0200 Subject: mm/gup: fix FOLL_FORCE COW security issue and remove FOLL_COW Ever since the Dirty COW (CVE-2016-5195) security issue happened, we know that FOLL_FORCE can be possibly dangerous, especially if there are races that can be exploited by user space. Right now, it would be sufficient to have some code that sets a PTE of a R/O-mapped shared page dirty, in order for it to erroneously become writable by FOLL_FORCE. The implications of setting a write-protected PTE dirty might not be immediately obvious to everyone. And in fact ever since commit 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte"), we can use UFFDIO_CONTINUE to map a shmem page R/O while marking the pte dirty. This can be used by unprivileged user space to modify tmpfs/shmem file content even if the user does not have write permissions to the file, and to bypass memfd write sealing -- Dirty COW restricted to tmpfs/shmem (CVE-2022-2590). To fix such security issues for good, the insight is that we really only need that fancy retry logic (FOLL_COW) for COW mappings that are not writable (!VM_WRITE). And in a COW mapping, we really only broke COW if we have an exclusive anonymous page mapped. If we have something else mapped, or the mapped anonymous page might be shared (!PageAnonExclusive), we have to trigger a write fault to break COW. If we don't find an exclusive anonymous page when we retry, we have to trigger COW breaking once again because something intervened. Let's move away from this mandatory-retry + dirty handling and rely on our PageAnonExclusive() flag for making a similar decision, to use the same COW logic as in other kernel parts here as well. In case we stumble over a PTE in a COW mapping that does not map an exclusive anonymous page, COW was not properly broken and we have to trigger a fake write-fault to break COW. Just like we do in can_change_pte_writable() added via commit 64fe24a3e05e ("mm/mprotect: try avoiding write faults for exclusive anonymous pages when changing protection") and commit 76aefad628aa ("mm/mprotect: fix soft-dirty check in can_change_pte_writable()"), take care of softdirty and uffd-wp manually. For example, a write() via /proc/self/mem to a uffd-wp-protected range has to fail instead of silently granting write access and bypassing the userspace fault handler. Note that FOLL_FORCE is not only used for debug access, but also triggered by applications without debug intentions, for example, when pinning pages via RDMA. This fixes CVE-2022-2590. Note that only x86_64 and aarch64 are affected, because only those support CONFIG_HAVE_ARCH_USERFAULTFD_MINOR. Fortunately, FOLL_COW is no longer required to handle FOLL_FORCE. So let's just get rid of it. Thanks to Nadav Amit for pointing out that the pte_dirty() check in FOLL_FORCE code is problematic and might be exploitable. Note 1: We don't check for the PTE being dirty because it doesn't matter for making a "was COWed" decision anymore, and whoever modifies the page has to set the page dirty either way. Note 2: Kernels before extended uffd-wp support and before PageAnonExclusive (< 5.19) can simply revert the problematic commit instead and be safe regarding UFFDIO_CONTINUE. A backport to v5.19 requires minor adjustments due to lack of vma_soft_dirty_enabled(). Link: https://lkml.kernel.org/r/20220809205640.70916-1-david@redhat.com Fixes: 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte") Signed-off-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Axel Rasmussen Cc: Nadav Amit Cc: Peter Xu Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: John Hubbard Cc: Jason Gunthorpe Cc: David Laight Cc: [5.16] Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - mm/gup.c | 68 ++++++++++++++++++++++++++++++++++++------------------ mm/huge_memory.c | 64 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 89 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3bedc449c14d..982f2607180b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2885,7 +2885,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_COW 0x4000 /* internal GUP flag */ #define FOLL_ANON 0x8000 /* don't do file mappings */ #define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ diff --git a/mm/gup.c b/mm/gup.c index 732825157430..5abdaf487460 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, return -EEXIST; } -/* - * FOLL_FORCE can write to even unwritable pte's, but only - * after we've gone through a COW cycle and they are dirty. - */ -static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) +/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */ +static inline bool can_follow_write_pte(pte_t pte, struct page *page, + struct vm_area_struct *vma, + unsigned int flags) { - return pte_write(pte) || - ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); + /* If the pte is writable, we can write to the page. */ + if (pte_write(pte)) + return true; + + /* Maybe FOLL_FORCE is set to override it? */ + if (!(flags & FOLL_FORCE)) + return false; + + /* But FOLL_FORCE has no effect on shared mappings */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return false; + + /* ... or read-only private ones */ + if (!(vma->vm_flags & VM_MAYWRITE)) + return false; + + /* ... or already writable ones that just need to take a write fault */ + if (vma->vm_flags & VM_WRITE) + return false; + + /* + * See can_change_pte_writable(): we broke COW and could map the page + * writable if we have an exclusive anonymous page ... + */ + if (!page || !PageAnon(page) || !PageAnonExclusive(page)) + return false; + + /* ... and a write-fault isn't required for other reasons. */ + if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte)) + return false; + return !userfaultfd_pte_wp(vma, pte); } static struct page *follow_page_pte(struct vm_area_struct *vma, @@ -528,12 +556,19 @@ retry: } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { - pte_unmap_unlock(ptep, ptl); - return NULL; - } page = vm_normal_page(vma, address, pte); + + /* + * We only care about anon pages in can_follow_write_pte() and don't + * have to worry about pte_devmap() because they are never anon. + */ + if ((flags & FOLL_WRITE) && + !can_follow_write_pte(pte, page, vma, flags)) { + page = NULL; + goto out; + } + if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { /* * Only return device mapping pages in the FOLL_GET or FOLL_PIN @@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_struct *vma, return -EBUSY; } - /* - * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when - * necessary, even if maybe_mkwrite decided not to set pte_write. We - * can thus safely do subsequent page lookups as if they were reads. - * But only do so when looping for pte_write is futile: in some cases - * userspace may also be wanting to write to the gotten user page, - * which a read fault here might prevent (a readonly page might get - * reCOWed by userspace write). - */ - if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - *flags |= FOLL_COW; return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8a7c1b344abe..e9414ee57c5b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pmd_lockptr(mm, pmd)); - /* - * When we COW a devmap PMD entry, we split it into PTEs, so we should - * not be in this function with `flags & FOLL_COW` set. - */ - WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); - /* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) @@ -1395,14 +1389,42 @@ fallback: return VM_FAULT_FALLBACK; } -/* - * FOLL_FORCE can write to even unwritable pmd's, but only - * after we've gone through a COW cycle and they are dirty. - */ -static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) +/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */ +static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page, + struct vm_area_struct *vma, + unsigned int flags) { - return pmd_write(pmd) || - ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); + /* If the pmd is writable, we can write to the page. */ + if (pmd_write(pmd)) + return true; + + /* Maybe FOLL_FORCE is set to override it? */ + if (!(flags & FOLL_FORCE)) + return false; + + /* But FOLL_FORCE has no effect on shared mappings */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return false; + + /* ... or read-only private ones */ + if (!(vma->vm_flags & VM_MAYWRITE)) + return false; + + /* ... or already writable ones that just need to take a write fault */ + if (vma->vm_flags & VM_WRITE) + return false; + + /* + * See can_change_pte_writable(): we broke COW and could map the page + * writable if we have an exclusive anonymous page ... + */ + if (!page || !PageAnon(page) || !PageAnonExclusive(page)) + return false; + + /* ... and a write-fault isn't required for other reasons. */ + if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd)) + return false; + return !userfaultfd_huge_pmd_wp(vma, pmd); } struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, @@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; - struct page *page = NULL; + struct page *page; assert_spin_locked(pmd_lockptr(mm, pmd)); - if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) - goto out; + page = pmd_page(*pmd); + VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); + + if ((flags & FOLL_WRITE) && + !can_follow_write_pmd(*pmd, page, vma, flags)) + return NULL; /* Avoid dumping huge zero page */ if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) @@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, /* Full NUMA hinting faults to serialise migration in fault paths */ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) - goto out; - - page = pmd_page(*pmd); - VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page); + return NULL; if (!pmd_write(*pmd) && gup_must_unshare(flags, page)) return ERR_PTR(-EMLINK); @@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page); -out: return page; } -- cgit v1.2.3 From a39c5d3ce03dd890ab6a9be44b21177cec32da55 Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Sun, 7 Aug 2022 15:44:42 +0000 Subject: mm: add DEVICE_ZONE to FOR_ALL_ZONES FOR_ALL_ZONES should be consistent with enum zone_type. Otherwise, __count_zid_vm_events have the potential to add count to wrong item when zid is ZONE_DEVICE. Link: https://lkml.kernel.org/r/20220807154442.GA18167@haolee.io Signed-off-by: Hao Lee Cc: David Hildenbrand Cc: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/vm_event_item.h | 15 +++++++++++---- mm/vmstat.c | 9 ++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 404024486fa5..f3fc36cd2276 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -20,12 +20,19 @@ #define HIGHMEM_ZONE(xx) #endif -#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE +#ifdef CONFIG_ZONE_DEVICE +#define DEVICE_ZONE(xx) xx##_DEVICE, +#else +#define DEVICE_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \ + HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, - FOR_ALL_ZONES(PGALLOC), - FOR_ALL_ZONES(ALLOCSTALL), - FOR_ALL_ZONES(PGSCAN_SKIP), + FOR_ALL_ZONES(PGALLOC) + FOR_ALL_ZONES(ALLOCSTALL) + FOR_ALL_ZONES(PGSCAN_SKIP) PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, PGFAULT, PGMAJFAULT, PGLAZYFREED, diff --git a/mm/vmstat.c b/mm/vmstat.c index 373d2730fcf2..90af9a8572f5 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1168,8 +1168,15 @@ int fragmentation_index(struct zone *zone, unsigned int order) #define TEXT_FOR_HIGHMEM(xx) #endif +#ifdef CONFIG_ZONE_DEVICE +#define TEXT_FOR_DEVICE(xx) xx "_device", +#else +#define TEXT_FOR_DEVICE(xx) +#endif + #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ - TEXT_FOR_HIGHMEM(xx) xx "_movable", + TEXT_FOR_HIGHMEM(xx) xx "_movable", \ + TEXT_FOR_DEVICE(xx) const char * const vmstat_text[] = { /* enum zone_stat_item counters */ -- cgit v1.2.3 From f369b07c861435bd812a9d14493f71b34132ed6f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 11 Aug 2022 16:13:40 -0400 Subject: mm/uffd: reset write protection when unregister with wp-mode The motivation of this patch comes from a recent report and patchfix from David Hildenbrand on hugetlb shared handling of wr-protected page [1]. With the reproducer provided in commit message of [1], one can leverage the uffd-wp lazy-reset of ptes to trigger a hugetlb issue which can affect not only the attacker process, but also the whole system. The lazy-reset mechanism of uffd-wp was used to make unregister faster, meanwhile it has an assumption that any leftover pgtable entries should only affect the process on its own, so not only the user should be aware of anything it does, but also it should not affect outside of the process. But it seems that this is not true, and it can also be utilized to make some exploit easier. So far there's no clue showing that the lazy-reset is important to any userfaultfd users because normally the unregister will only happen once for a specific range of memory of the lifecycle of the process. Considering all above, what this patch proposes is to do explicit pte resets when unregister an uffd region with wr-protect mode enabled. It should be the same as calling ioctl(UFFDIO_WRITEPROTECT, wp=false) right before ioctl(UFFDIO_UNREGISTER) for the user. So potentially it'll make the unregister slower. From that pov it's a very slight abi change, but hopefully nothing should break with this change either. Regarding to the change itself - core of uffd write [un]protect operation is moved into a separate function (uffd_wp_range()) and it is reused in the unregister code path. Note that the new function will not check for anything, e.g. ranges or memory types, because they should have been checked during the previous UFFDIO_REGISTER or it should have failed already. It also doesn't check mmap_changing because we're with mmap write lock held anyway. I added a Fixes upon introducing of uffd-wp shmem+hugetlbfs because that's the only issue reported so far and that's the commit David's reproducer will start working (v5.19+). But the whole idea actually applies to not only file memories but also anonymous. It's just that we don't need to fix anonymous prior to v5.19- because there's no known way to exploit. IOW, this patch can also fix the issue reported in [1] as the patch 2 does. [1] https://lore.kernel.org/all/20220811103435.188481-3-david@redhat.com/ Link: https://lkml.kernel.org/r/20220811201340.39342-1-peterx@redhat.com Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") Signed-off-by: Peter Xu Cc: David Hildenbrand Cc: Mike Rapoport Cc: Mike Kravetz Cc: Andrea Arcangeli Cc: Nadav Amit Cc: Axel Rasmussen Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 4 ++++ include/linux/userfaultfd_k.h | 2 ++ mm/userfaultfd.c | 29 ++++++++++++++++++----------- 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1c44bf75f916..175de70e3adf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); } + /* Reset ptes for the whole vma range if wr-protected */ + if (userfaultfd_wp(vma)) + uffd_wp_range(mm, vma, start, vma_end - start, false); + new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 732b522bacb7..e1b8a915e9e9 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); +extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, + unsigned long start, unsigned long len, bool enable_wp); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 07d3befc80e4..7327b2573f7c 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, mmap_changing, 0); } +void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, + unsigned long start, unsigned long len, bool enable_wp) +{ + struct mmu_gather tlb; + pgprot_t newprot; + + if (enable_wp) + newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); + else + newprot = vm_get_page_prot(dst_vma->vm_flags); + + tlb_gather_mmu(&tlb, dst_mm); + change_protection(&tlb, dst_vma, start, start + len, newprot, + enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); + tlb_finish_mmu(&tlb); +} + int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing) { struct vm_area_struct *dst_vma; unsigned long page_mask; - struct mmu_gather tlb; - pgprot_t newprot; int err; /* @@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, goto out_unlock; } - if (enable_wp) - newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); - else - newprot = vm_get_page_prot(dst_vma->vm_flags); - - tlb_gather_mmu(&tlb, dst_mm); - change_protection(&tlb, dst_vma, start, start + len, newprot, - enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); - tlb_finish_mmu(&tlb); + uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); err = 0; out_unlock: -- cgit v1.2.3 From cb241339b9d020c758a6647c69f8e42538c5cf88 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 10 Aug 2022 21:51:09 -0700 Subject: mm/shmem: fix chattr fsflags support in tmpfs ext[234] have always allowed unimplemented chattr flags to be set, but other filesystems have tended to be stricter. Follow the stricter approach for tmpfs: I don't want to have to explain why csu attributes don't actually work, and we won't need to update the chattr(1) manpage; and it's never wrong to start off strict, relaxing later if persuaded. Allow only a (append only) i (immutable) A (no atime) and d (no dump). Although lsattr showed 'A' inherited, the NOATIME behavior was not being inherited: because nothing sync'ed FS_NOATIME_FL to S_NOATIME. Add shmem_set_inode_flags() to sync the flags, using inode_set_flags() to avoid that instant of lost immutablility during fileattr_set(). But that change switched generic/079 from passing to failing: because FS_IMMUTABLE_FL and FS_APPEND_FL had been unconventionally included in the INHERITED fsflags: remove them and generic/079 is back to passing. Link: https://lkml.kernel.org/r/2961dcb0-ddf3-b9f0-3268-12a4ff996856@google.com Fixes: e408e695f5f1 ("mm/shmem: support FS_IOC_[SG]ETFLAGS in tmpfs") Signed-off-by: Hugh Dickins Cc: "Theodore Ts'o" Cc: Radoslaw Burny Cc: "Darrick J. Wong" Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 13 ++++-------- mm/shmem.c | 54 +++++++++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 1b6c4013f691..ff0b990de83d 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -29,15 +29,10 @@ struct shmem_inode_info { struct inode vfs_inode; }; -#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE -#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE -#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE - -/* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) - -/* Flags that are appropriate for non-directories/regular files. */ -#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) +#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE +#define SHMEM_FL_USER_MODIFIABLE \ + (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) +#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) struct shmem_sb_info { unsigned long max_blocks; /* How many blocks are allowed */ diff --git a/mm/shmem.c b/mm/shmem.c index 5783f11351bb..170b4078420f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2281,16 +2281,34 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -/* Mask out flags that are inappropriate for the given type of inode. */ -static unsigned shmem_mask_flags(umode_t mode, __u32 flags) +#ifdef CONFIG_TMPFS_XATTR +static int shmem_initxattrs(struct inode *, const struct xattr *, void *); + +/* + * chattr's fsflags are unrelated to extended attributes, + * but tmpfs has chosen to enable them under the same config option. + */ +static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) +{ + unsigned int i_flags = 0; + + if (fsflags & FS_NOATIME_FL) + i_flags |= S_NOATIME; + if (fsflags & FS_APPEND_FL) + i_flags |= S_APPEND; + if (fsflags & FS_IMMUTABLE_FL) + i_flags |= S_IMMUTABLE; + /* + * But FS_NODUMP_FL does not require any action in i_flags. + */ + inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE); +} +#else +static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) { - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & SHMEM_REG_FLMASK; - else - return flags & SHMEM_OTHER_FLMASK; } +#define shmem_initxattrs NULL +#endif static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev, unsigned long flags) @@ -2319,7 +2337,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir, info->i_crtime = inode->i_mtime; info->fsflags = (dir == NULL) ? 0 : SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; - info->fsflags = shmem_mask_flags(mode, info->fsflags); + if (info->fsflags) + shmem_set_inode_flags(inode, info->fsflags); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); @@ -2468,12 +2487,6 @@ out_unacct_blocks: static const struct inode_operations shmem_symlink_inode_operations; static const struct inode_operations shmem_short_symlink_operations; -#ifdef CONFIG_TMPFS_XATTR -static int shmem_initxattrs(struct inode *, const struct xattr *, void *); -#else -#define shmem_initxattrs NULL -#endif - static int shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, @@ -3179,18 +3192,13 @@ static int shmem_fileattr_set(struct user_namespace *mnt_userns, if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; + if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE) + return -EOPNOTSUPP; info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | (fa->flags & SHMEM_FL_USER_MODIFIABLE); - inode->i_flags &= ~(S_APPEND | S_IMMUTABLE | S_NOATIME); - if (info->fsflags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; - if (info->fsflags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - if (info->fsflags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; - + shmem_set_inode_flags(inode, info->fsflags); inode->i_ctime = current_time(inode); return 0; } -- cgit v1.2.3 From d59b73a66e5e0682442b6d7b4965364e57078b80 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 3 Aug 2022 10:49:23 +0300 Subject: net/mlx5: Avoid false positive lockdep warning by adding lock_class_key Add a lock_class_key per mlx5 device to avoid a false positive "possible circular locking dependency" warning by lockdep, on flows which lock more than one mlx5 device, such as adding SF. kernel log: ====================================================== WARNING: possible circular locking dependency detected 5.19.0-rc8+ #2 Not tainted ------------------------------------------------------ kworker/u20:0/8 is trying to acquire lock: ffff88812dfe0d98 (&dev->intf_state_mutex){+.+.}-{3:3}, at: mlx5_init_one+0x2e/0x490 [mlx5_core] but task is already holding lock: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(¬ifier->n_head)->rwsem){++++}-{3:3}: down_write+0x90/0x150 blocking_notifier_chain_register+0x53/0xa0 mlx5_sf_table_init+0x369/0x4a0 [mlx5_core] mlx5_init_one+0x261/0x490 [mlx5_core] probe_one+0x430/0x680 [mlx5_core] local_pci_probe+0xd6/0x170 work_for_cpu_fn+0x4e/0xa0 process_one_work+0x7c2/0x1340 worker_thread+0x6f6/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 -> #0 (&dev->intf_state_mutex){+.+.}-{3:3}: __lock_acquire+0x2fc7/0x6720 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 bus_for_each_drv+0x123/0x1a0 __device_attach+0x1a3/0x460 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 __auxiliary_device_add+0x88/0xc0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); lock(&(¬ifier->n_head)->rwsem); lock(&dev->intf_state_mutex); *** DEADLOCK *** 4 locks held by kworker/u20:0/8: #0: ffff888150612938 ((wq_completion)mlx5_events){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888100cafdb8 ((work_completion)(&work->work)#3){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffff888101aa7898 (&(¬ifier->n_head)->rwsem){++++}-{3:3}, at: blocking_notifier_call_chain+0x5a/0x130 #3: ffff88813682d0e8 (&dev->mutex){....}-{3:3}, at:__device_attach+0x76/0x460 stack backtrace: CPU: 6 PID: 8 Comm: kworker/u20:0 Not tainted 5.19.0-rc8+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_events mlx5_vhca_state_work_handler [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? lock_chain_count+0x20/0x20 ? register_lock_class+0x1880/0x1880 __lock_acquire+0x2fc7/0x6720 ? register_lock_class+0x1880/0x1880 ? register_lock_class+0x1880/0x1880 lock_acquire+0x1c1/0x550 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 __mutex_lock+0x12c/0x14b0 ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? mlx5_init_one+0x2e/0x490 [mlx5_core] ? _raw_read_unlock+0x1f/0x30 ? mutex_lock_io_nested+0x1320/0x1320 ? __ioremap_caller.constprop.0+0x306/0x490 ? mlx5_sf_dev_probe+0x269/0x370 [mlx5_core] ? iounmap+0x160/0x160 mlx5_init_one+0x2e/0x490 [mlx5_core] mlx5_sf_dev_probe+0x29c/0x370 [mlx5_core] ? mlx5_sf_dev_remove+0x130/0x130 [mlx5_core] auxiliary_bus_probe+0x9d/0xe0 really_probe+0x1e0/0xaa0 __driver_probe_device+0x219/0x480 ? auxiliary_match_id+0xe9/0x140 driver_probe_device+0x49/0x130 __device_attach_driver+0x1b8/0x280 ? driver_allows_async_probing+0x140/0x140 bus_for_each_drv+0x123/0x1a0 ? bus_for_each_dev+0x1a0/0x1a0 ? lockdep_hardirqs_on_prepare+0x286/0x400 ? trace_hardirqs_on+0x2d/0x100 __device_attach+0x1a3/0x460 ? device_driver_attach+0x1e0/0x1e0 ? kobject_uevent_env+0x22d/0xf10 bus_probe_device+0x1a2/0x260 device_add+0x9b1/0x1b40 ? dev_set_name+0xab/0xe0 ? __fw_devlink_link_to_suppliers+0x260/0x260 ? memset+0x20/0x40 ? lockdep_init_map_type+0x21a/0x7d0 __auxiliary_device_add+0x88/0xc0 ? auxiliary_device_init+0x86/0xa0 mlx5_sf_dev_state_change_handler+0x67e/0x9d0 [mlx5_core] blocking_notifier_call_chain+0xd5/0x130 mlx5_vhca_state_work_handler+0x2b0/0x3f0 [mlx5_core] ? mlx5_vhca_event_arm+0x100/0x100 [mlx5_core] ? lock_downgrade+0x6e0/0x6e0 ? lockdep_hardirqs_on_prepare+0x286/0x400 process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++ include/linux/mlx5/driver.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bec8d6d0b5f6..c085b031abfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1530,7 +1530,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); + lockdep_register_key(&dev->lock_key); mutex_init(&dev->intf_state_mutex); + lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1597,6 +1599,7 @@ err_timeout_init: mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); return err; } @@ -1618,6 +1621,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); } static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa4..7b7ce602c808 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -779,6 +779,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; + struct lock_class_key lock_key; unsigned long intf_state; struct mlx5_priv priv; struct mlx5_profile profile; -- cgit v1.2.3 From f2e44dffa97f2e1c222a959ea5b6e604548b891b Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Fri, 19 Aug 2022 11:15:14 -0400 Subject: bonding: 3ad: make ad_ticks_per_sec a const The value is only ever set once in bond_3ad_initialize and only ever read otherwise. There seems to be no reason to set the variable via bond_3ad_initialize when setting the global variable will do. Change ad_ticks_per_sec to a const to enforce its read-only usage. Signed-off-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_3ad.c | 11 +++-------- drivers/net/bonding/bond_main.c | 2 +- include/net/bond_3ad.h | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 1f0120cbe9e8..184608bd8999 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -84,7 +84,8 @@ enum ad_link_speed_type { static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = { 0, 0, 0, 0, 0, 0 }; -static u16 ad_ticks_per_sec; + +static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = @@ -2001,11 +2002,10 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) /** * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures * @bond: bonding struct to work on - * @tick_resolution: tick duration (millisecond resolution) * * Can be called only after the mac address of the bond is set. */ -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) +void bond_3ad_initialize(struct bonding *bond) { BOND_AD_INFO(bond).aggregator_identifier = 0; BOND_AD_INFO(bond).system.sys_priority = @@ -2017,11 +2017,6 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->params.ad_actor_system); - /* initialize how many times this module is called in one - * second (should be about every 100ms) - */ - ad_ticks_per_sec = tick_resolution; - bond_3ad_initiate_agg_selection(bond, AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 50e60843020c..2f4da2c13c0a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2081,7 +2081,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* Initialize AD with the number of times that the AD timer is called in 1 second * can be called only after the mac address of the bond is set */ - bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL); + bond_3ad_initialize(bond); } else { SLAVE_AD_INFO(new_slave)->id = SLAVE_AD_INFO(prev_slave)->id + 1; diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 184105d68294..be2992e6de5d 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -290,7 +290,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state) } /* ========== AD Exported functions to the main bonding code ========== */ -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution); +void bond_3ad_initialize(struct bonding *bond); void bond_3ad_bind_slave(struct slave *slave); void bond_3ad_unbind_slave(struct slave *slave); void bond_3ad_state_machine_handler(struct work_struct *); -- cgit v1.2.3 From 13a8e0f6b01b14b2e28ba144e112c883f03a3db2 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 19 Aug 2022 15:16:11 -0700 Subject: Revert "driver core: Delete driver_deferred_probe_check_state()" This reverts commit 9cbffc7a59561be950ecc675d19a3d2b45202b2b. There are a few more issues to fix that have been reported in the thread for the original series [1]. We'll need to fix those before this will work. So, revert it for now. [1] - https://lore.kernel.org/lkml/20220601070707.3946847-1-saravanak@google.com/ Fixes: 9cbffc7a5956 ("driver core: Delete driver_deferred_probe_check_state()") Tested-by: Tony Lindgren Tested-by: Peng Fan Tested-by: Douglas Anderson Tested-by: Alexander Stein Reviewed-by: Tony Lindgren Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20220819221616.2107893-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 30 ++++++++++++++++++++++++++++++ include/linux/device/driver.h | 1 + 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 70f79fc71539..a8916d1bfdcb 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -274,12 +274,42 @@ static int __init deferred_probe_timeout_setup(char *str) } __setup("deferred_probe_timeout=", deferred_probe_timeout_setup); +/** + * driver_deferred_probe_check_state() - Check deferred probe state + * @dev: device to check + * + * Return: + * * -ENODEV if initcalls have completed and modules are disabled. + * * -ETIMEDOUT if the deferred probe timeout was set and has expired + * and modules are enabled. + * * -EPROBE_DEFER in other cases. + * + * Drivers or subsystems can opt-in to calling this function instead of directly + * returning -EPROBE_DEFER. + */ +int driver_deferred_probe_check_state(struct device *dev) +{ + if (!IS_ENABLED(CONFIG_MODULES) && initcalls_done) { + dev_warn(dev, "ignoring dependency for device, assuming no driver\n"); + return -ENODEV; + } + + if (!driver_deferred_probe_timeout && initcalls_done) { + dev_warn(dev, "deferred probe timeout, ignoring dependency\n"); + return -ETIMEDOUT; + } + + return -EPROBE_DEFER; +} +EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); + static void deferred_probe_timeout_work_func(struct work_struct *work) { struct device_private *p; fw_devlink_drivers_done(); + driver_deferred_probe_timeout = 0; driver_deferred_probe_trigger(); flush_work(&deferred_probe_work); diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 7acaabde5396..2114d65b862f 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -242,6 +242,7 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); +int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); /** -- cgit v1.2.3 From e1d0c6d05afdcff01ace698edb3b8808db1dc066 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 23 Aug 2022 18:45:49 +0700 Subject: io_uring: uapi: Add `extern "C"` in io_uring.h for liburing Make it easy for liburing to integrate uapi header with the kernel. Previously, when this header changes, the liburing side can't directly copy this header file due to some small differences. Sync them. Link: https://lore.kernel.org/io-uring/f1feef16-6ea2-0653-238f-4aaee35060b6@kernel.dk Cc: Bart Van Assche Cc: Dylan Yudaken Cc: Facebook Kernel Team Signed-off-by: Ammar Faizi Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1463cfecb56b..9e0b5c8d92ce 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -12,6 +12,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /* * IO submission data structure (Submission Queue Entry) */ @@ -661,4 +665,8 @@ struct io_uring_recvmsg_out { __u32 flags; }; +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3 From 7997eff82828304b780dc0a39707e1946d6f1ebf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Aug 2022 17:38:37 +0200 Subject: netfilter: ebtables: reject blobs that don't provide all entry points Harshit Mogalapalli says: In ebt_do_table() function dereferencing 'private->hook_entry[hook]' can lead to NULL pointer dereference. [..] Kernel panic: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] [..] RIP: 0010:ebt_do_table+0x1dc/0x1ce0 Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 5c 16 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 6c df 08 48 8d 7d 2c 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 88 [..] Call Trace: nf_hook_slow+0xb1/0x170 __br_forward+0x289/0x730 maybe_deliver+0x24b/0x380 br_flood+0xc6/0x390 br_dev_xmit+0xa2e/0x12c0 For some reason ebtables rejects blobs that provide entry points that are not supported by the table, but what it should instead reject is the opposite: blobs that DO NOT provide an entry point supported by the table. t->valid_hooks is the bitmask of hooks (input, forward ...) that will see packets. Providing an entry point that is not support is harmless (never called/used), but the inverse isn't: it results in a crash because the ebtables traverser doesn't expect a NULL blob for a location its receiving packets for. Instead of fixing all the individual checks, do what iptables is doing and reject all blobs that differ from the expected hooks. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Harshit Mogalapalli Reported-by: syzkaller Signed-off-by: Florian Westphal --- include/linux/netfilter_bridge/ebtables.h | 4 ---- net/bridge/netfilter/ebtable_broute.c | 8 -------- net/bridge/netfilter/ebtable_filter.c | 8 -------- net/bridge/netfilter/ebtable_nat.c | 8 -------- net/bridge/netfilter/ebtables.c | 8 +------- 5 files changed, 1 insertion(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a13296d6c7ce..fd533552a062 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -94,10 +94,6 @@ struct ebt_table { struct ebt_replace_kernel *table; unsigned int valid_hooks; rwlock_t lock; - /* e.g. could be the table explicitly only allows certain - * matches, targets, ... 0 == let it in */ - int (*check)(const struct ebt_table_info *info, - unsigned int valid_hooks); /* the data used by the kernel */ struct ebt_table_info *private; struct nf_hook_ops *ops; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1a11064f9990..8f19253024b0 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)&initial_chain, }; -static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~(1 << NF_BR_BROUTING)) - return -EINVAL; - return 0; -} - static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index cb949436bc0e..278f324e6752 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)initial_chains, }; -static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~FILTER_VALID_HOOKS) - return -EINVAL; - return 0; -} - static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 5ee0531ae506..9066f7f376d5 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)initial_chains, }; -static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~NAT_VALID_HOOKS) - return -EINVAL; - return 0; -} - static const struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f2dbefb61ce8..9a0ae59cdc50 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1040,8 +1040,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_iterate; } - /* the table doesn't like it */ - if (t->check && (ret = t->check(newinfo, repl->valid_hooks))) + if (repl->valid_hooks != t->valid_hooks) goto free_unlock; if (repl->num_counters && repl->num_counters != t->private->nentries) { @@ -1231,11 +1230,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, if (ret != 0) goto free_chainstack; - if (table->check && table->check(newinfo, table->valid_hooks)) { - ret = -EINVAL; - goto free_chainstack; - } - table->private = newinfo; rwlock_init(&table->lock); mutex_lock(&ebt_mutex); -- cgit v1.2.3 From ab482c6b66a4a8c0a8c0b0f577a785cf9ff1c2e2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2022 10:52:48 +0200 Subject: netfilter: nf_tables: make table handle allocation per-netns friendly mutex is per-netns, move table_netns to the pernet area. *read-write* to 0xffffffff883a01e8 of 8 bytes by task 6542 on cpu 0: nf_tables_newtable+0x6dc/0xc00 net/netfilter/nf_tables_api.c:1221 nfnetlink_rcv_batch net/netfilter/nfnetlink.c:513 [inline] nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0xa6a/0x13a0 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x652/0x730 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x643/0x740 net/netlink/af_netlink.c:1921 Fixes: f102d66b335a ("netfilter: nf_tables: use dedicated mutex to guard transactions") Reported-by: Abhishek Shah Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 99aae36c04b9..cdb7db9b0e25 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1652,6 +1652,7 @@ struct nftables_pernet { struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; + u64 table_handle; unsigned int base_seq; u8 validate_state; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dff2b5851bbb..0951f31caabe 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); static LIST_HEAD(nf_tables_destroy_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); -static u64 table_handle; enum { NFT_VALIDATE_SKIP = 0, @@ -1235,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&table->flowtables); table->family = family; table->flags = flags; - table->handle = ++table_handle; + table->handle = ++nft_net->table_handle; if (table->flags & NFT_TABLE_F_OWNER) table->nlpid = NETLINK_CB(skb).portid; -- cgit v1.2.3 From 759eebbcfafcefa23b59e912396306543764bd3c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 22 Aug 2022 23:13:00 +0200 Subject: netfilter: flowtable: add function to invoke garbage collection immediately Expose nf_flow_table_gc_run() to force a garbage collector run from the offload infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + net/netfilter/nf_flow_table_core.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d5326c44b453..476cc4423a90 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -270,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); +void nf_flow_table_gc_run(struct nf_flowtable *flow_table); void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, struct net_device *dev); void nf_flow_table_cleanup(struct net_device *dev); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 765ac779bfc8..60fc1e1b7182 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -437,12 +437,17 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, } } +void nf_flow_table_gc_run(struct nf_flowtable *flow_table) +{ + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); +} + static void nf_flow_offload_work_gc(struct work_struct *work) { struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_gc_run(flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } @@ -601,10 +606,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_gc_run(flow_table); nf_flow_table_offload_flush(flow_table); if (nf_flowtable_hw_offload(flow_table)) - nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); + nf_flow_table_gc_run(flow_table); + rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); -- cgit v1.2.3 From 9afb4b27349a499483ae0134282cefd0c90f480f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Nov 2021 22:24:15 +0100 Subject: netfilter: flowtable: fix stuck flows on cleanup due to pending work To clear the flow table on flow table free, the following sequence normally happens in order: 1) gc_step work is stopped to disable any further stats/del requests. 2) All flow table entries are set to teardown state. 3) Run gc_step which will queue HW del work for each flow table entry. 4) Waiting for the above del work to finish (flush). 5) Run gc_step again, deleting all entries from the flow table. 6) Flow table is freed. But if a flow table entry already has pending HW stats or HW add work step 3 will not queue HW del work (it will be skipped), step 4 will wait for the pending add/stats to finish, and step 5 will queue HW del work which might execute after freeing of the flow table. To fix the above, this patch flushes the pending work, then it sets the teardown flag to all flows in the flowtable and it forces a garbage collector run to queue work to remove the flows from hardware, then it flushes this new pending work and (finally) it forces another garbage collector run to remove the entry from the software flowtable. Stack trace: [47773.882335] BUG: KASAN: use-after-free in down_read+0x99/0x460 [47773.883634] Write of size 8 at addr ffff888103b45aa8 by task kworker/u20:6/543704 [47773.885634] CPU: 3 PID: 543704 Comm: kworker/u20:6 Not tainted 5.12.0-rc7+ #2 [47773.886745] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009) [47773.888438] Workqueue: nf_ft_offload_del flow_offload_work_handler [nf_flow_table] [47773.889727] Call Trace: [47773.890214] dump_stack+0xbb/0x107 [47773.890818] print_address_description.constprop.0+0x18/0x140 [47773.892990] kasan_report.cold+0x7c/0xd8 [47773.894459] kasan_check_range+0x145/0x1a0 [47773.895174] down_read+0x99/0x460 [47773.899706] nf_flow_offload_tuple+0x24f/0x3c0 [nf_flow_table] [47773.907137] flow_offload_work_handler+0x72d/0xbe0 [nf_flow_table] [47773.913372] process_one_work+0x8ac/0x14e0 [47773.921325] [47773.921325] Allocated by task 592159: [47773.922031] kasan_save_stack+0x1b/0x40 [47773.922730] __kasan_kmalloc+0x7a/0x90 [47773.923411] tcf_ct_flow_table_get+0x3cb/0x1230 [act_ct] [47773.924363] tcf_ct_init+0x71c/0x1156 [act_ct] [47773.925207] tcf_action_init_1+0x45b/0x700 [47773.925987] tcf_action_init+0x453/0x6b0 [47773.926692] tcf_exts_validate+0x3d0/0x600 [47773.927419] fl_change+0x757/0x4a51 [cls_flower] [47773.928227] tc_new_tfilter+0x89a/0x2070 [47773.936652] [47773.936652] Freed by task 543704: [47773.937303] kasan_save_stack+0x1b/0x40 [47773.938039] kasan_set_track+0x1c/0x30 [47773.938731] kasan_set_free_info+0x20/0x30 [47773.939467] __kasan_slab_free+0xe7/0x120 [47773.940194] slab_free_freelist_hook+0x86/0x190 [47773.941038] kfree+0xce/0x3a0 [47773.941644] tcf_ct_flow_table_cleanup_work Original patch description and stack trace by Paul Blakey. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Paul Blakey Tested-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 2 ++ net/netfilter/nf_flow_table_core.c | 7 +++---- net/netfilter/nf_flow_table_offload.c | 8 ++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 476cc4423a90..cd982f4a0f50 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -307,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, struct flow_offload *flow); void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); +void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); + int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 60fc1e1b7182..81c26a96c30b 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -605,12 +605,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); + nf_flow_table_offload_flush(flow_table); + /* ... no more pending work after this stage ... */ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_gc_run(flow_table); - nf_flow_table_offload_flush(flow_table); - if (nf_flowtable_hw_offload(flow_table)) - nf_flow_table_gc_run(flow_table); - + nf_flow_table_offload_flush_cleanup(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 103b6cbf257f..b04645ced89b 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1074,6 +1074,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, flow_offload_queue_work(offload); } +void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable) +{ + if (nf_flowtable_hw_offload(flowtable)) { + flush_workqueue(nf_flow_offload_del_wq); + nf_flow_table_gc_run(flowtable); + } +} + void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { if (nf_flowtable_hw_offload(flowtable)) { -- cgit v1.2.3 From c42b7cddea47503411bfb5f2f93a4154aaffa2d9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:51 -0700 Subject: net: Fix a data-race around sysctl_net_busy_poll. While reading sysctl_net_busy_poll, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 060212928670 ("net: add low latency socket poll") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c4898fcbf923..f90f0021f5f2 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly; static inline bool net_busy_loop_on(void) { - return sysctl_net_busy_poll; + return READ_ONCE(sysctl_net_busy_poll); } static inline bool sk_can_busy_loop(const struct sock *sk) -- cgit v1.2.3 From af67508ea6cbf0e4ea27f8120056fa2efce127dd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:56 -0700 Subject: net: Fix data-races around sysctl_fb_tunnels_only_for_init_net. While reading sysctl_fb_tunnels_only_for_init_net, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 79134e6ce2c9 ("net: do not create fallback tunnels for non-default namespaces") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1a3cb93c3dcc..6d3a33fd0cdb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -640,9 +640,14 @@ extern int sysctl_devconf_inherit_init_net; */ static inline bool net_has_fallback_tunnels(const struct net *net) { - return !IS_ENABLED(CONFIG_SYSCTL) || - !sysctl_fb_tunnels_only_for_init_net || - (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); +#if IS_ENABLED(CONFIG_SYSCTL) + int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); + + return !fb_tunnels_only_for_init_net || + (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); +#else + return true; +#endif } static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) -- cgit v1.2.3 From a5612ca10d1aa05624ebe72633e0c8c792970833 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:57 -0700 Subject: net: Fix data-races around sysctl_devconf_inherit_init_net. While reading sysctl_devconf_inherit_init_net, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 856c395cfa63 ("net: introduce a knob to control whether to inherit devconf config") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ net/ipv4/devinet.c | 16 ++++++++++------ net/ipv6/addrconf.c | 5 ++--- 3 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6d3a33fd0cdb..05d6f3facd5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -650,6 +650,15 @@ static inline bool net_has_fallback_tunnels(const struct net *net) #endif } +static inline int net_inherit_devconf(void) +{ +#if IS_ENABLED(CONFIG_SYSCTL) + return READ_ONCE(sysctl_devconf_inherit_init_net); +#else + return 0; +#endif +} + static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 92b778e423df..e8b9a9202fec 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2682,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net) #endif if (!net_eq(net, &init_net)) { - if (IS_ENABLED(CONFIG_SYSCTL) && - sysctl_devconf_inherit_init_net == 3) { + switch (net_inherit_devconf()) { + case 3: /* copy from the current netns */ memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all, sizeof(ipv4_devconf)); memcpy(dflt, current->nsproxy->net_ns->ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); - } else if (!IS_ENABLED(CONFIG_SYSCTL) || - sysctl_devconf_inherit_init_net != 2) { - /* inherit == 0 or 1: copy from init_net */ + break; + case 0: + case 1: + /* copy from init_net */ memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); + break; + case 2: + /* use compiled values */ + break; } - /* else inherit == 2: use compiled values */ } #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b624e3d8c5f0..e15f64f22fa8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7162,9 +7162,8 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; - if (IS_ENABLED(CONFIG_SYSCTL) && - !net_eq(net, &init_net)) { - switch (sysctl_devconf_inherit_init_net) { + if (!net_eq(net, &init_net)) { + switch (net_inherit_devconf()) { case 1: /* copy from init_net */ memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); -- cgit v1.2.3 From 8db24af3f02ebdbf302196006ebb270c4c3a2706 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 10:46:58 -0700 Subject: net: Fix a data-race around gro_normal_batch. While reading gro_normal_batch, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs") Signed-off-by: Kuniyuki Iwashima Acked-by: Edward Cree Signed-off-by: David S. Miller --- include/net/gro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 867656b0739c..24003dea8fa4 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -439,7 +439,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, { list_add_tail(&skb->list, &napi->rx_list); napi->rx_count += segs; - if (napi->rx_count >= gro_normal_batch) + if (napi->rx_count >= READ_ONCE(gro_normal_batch)) gro_normal_list(napi); } -- cgit v1.2.3 From 2a5840124009f133bd09fd855963551fb2cefe22 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Jul 2022 12:16:22 -0700 Subject: lsm,io_uring: add LSM hooks for the new uring_cmd file op io-uring cmd support was added through ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd"), this extended the struct file_operations to allow a new command which each subsystem can use to enable command passthrough. Add an LSM specific for the command passthrough which enables LSMs to inspect the command details. This was discussed long ago without no clear pointer for something conclusive, so this enables LSMs to at least reject this new file operation. [0] https://lkml.kernel.org/r/8adf55db-7bab-f59d-d612-ed906b948d19@schaufler-ca.com Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Luis Chamberlain Acked-by: Jens Axboe Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 3 +++ include/linux/security.h | 5 +++++ io_uring/uring_cmd.c | 5 +++++ security/security.c | 4 ++++ 5 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 806448173033..60fff133c0b1 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -407,4 +407,5 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #ifdef CONFIG_IO_URING LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) +LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84a0d7e02176..3aa6030302f5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1582,6 +1582,9 @@ * Check whether the current task is allowed to spawn a io_uring polling * thread (IORING_SETUP_SQPOLL). * + * @uring_cmd: + * Check whether the file_operations uring_cmd is allowed to run. + * */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); diff --git a/include/linux/security.h b/include/linux/security.h index 1bc362cb413f..7bd0c490703d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2060,6 +2060,7 @@ static inline int security_perf_event_write(struct perf_event *event) #ifdef CONFIG_SECURITY extern int security_uring_override_creds(const struct cred *new); extern int security_uring_sqpoll(void); +extern int security_uring_cmd(struct io_uring_cmd *ioucmd); #else static inline int security_uring_override_creds(const struct cred *new) { @@ -2069,6 +2070,10 @@ static inline int security_uring_sqpoll(void) { return 0; } +static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8e0cc2d9205e..0f7ad956ddcb 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -88,6 +89,10 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (!req->file->f_op->uring_cmd) return -EOPNOTSUPP; + ret = security_uring_cmd(ioucmd); + if (ret) + return ret; + if (ctx->flags & IORING_SETUP_SQE128) issue_flags |= IO_URING_F_SQE128; if (ctx->flags & IORING_SETUP_CQE32) diff --git a/security/security.c b/security/security.c index 14d30fec8a00..4b95de24bc8d 100644 --- a/security/security.c +++ b/security/security.c @@ -2660,4 +2660,8 @@ int security_uring_sqpoll(void) { return call_int_hook(uring_sqpoll, 0); } +int security_uring_cmd(struct io_uring_cmd *ioucmd) +{ + return call_int_hook(uring_cmd, 0, ioucmd); +} #endif /* CONFIG_IO_URING */ -- cgit v1.2.3 From 8238b4579866b7c1bb99883cfe102a43db5506ff Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 09:17:08 -0400 Subject: wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- Documentation/atomic_bitops.txt | 10 ++++------ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/generic-non-atomic.h | 14 ++++++++++++++ .../asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++++ include/asm-generic/bitops/non-atomic.h | 1 + .../bitops/non-instrumented-non-atomic.h | 1 + include/linux/bitops.h | 1 + include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 10 files changed, 60 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index d8b101c97031..edea4656c5c0 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_set_bit_lock(), - if the bit in memory is unchanged by the operation then it is deemed to have - failed. + - RMW operations that are conditional are fully ordered. -Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and -clear_bit_unlock() which has RELEASE semantics. +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, +clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has +ACQUIRE semantics. Since a platform only has a single means of achieving atomic operations the same barriers as for atomic_t are used, see atomic_t.txt. diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 973c6bd17f98..0fe9de58af31 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 3d5ebd24652b..564a8c675d85 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include +#include #ifndef _LINUX_BITOPS_H #error only can be included directly @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit +#define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index 988a3bbfba34..2b238b161a62 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volatile unsigned long *addr) return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 5c37ced343ae..71f8d54a5195 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include diff --git a/include/asm-generic/bitops/non-instrumented-non-atomic.h b/include/asm-generic/bitops/non-instrumented-non-atomic.h index bdb9b1ffaee9..0ddc78dfc358 100644 --- a/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ b/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -12,5 +12,6 @@ #define ___test_and_change_bit arch___test_and_change_bit #define _test_bit arch_test_bit +#define _test_bit_acquire arch_test_bit_acquire #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf9bf65039f2..3b89c64bcfd8 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index def8b8d30ccc..089c9ade4325 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7dec36aecbd9..7725b7579b78 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index d4788f810b55..0b1cd985dc27 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); -- cgit v1.2.3 From fcab34b433e2c13e333b2f53c4a8409eadc432c7 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 10 Aug 2022 10:53:59 -0600 Subject: mm: re-allow pinning of zero pfns (again) The below referenced commit makes the same error as 1c563432588d ("mm: fix is_pinnable_page against a cma page"), re-interpreting the logic to exclude pinning of the zero page, which breaks device assignment with vfio. To avoid further subtle mistakes, split the logic into discrete tests. [akpm@linux-foundation.org: simplify comment, per John] Link: https://lkml.kernel.org/r/166015037385.760108.16881097713975517242.stgit@omen Link: https://lore.kernel.org/all/165490039431.944052.12458624139225785964.stgit@omen Fixes: f25cbb7a95a2 ("mm: add zone device coherent type memory support") Signed-off-by: Alex Williamson Suggested-by: Matthew Wilcox Suggested-by: Felix Kuehling Tested-by: Slawomir Laba Reviewed-by: John Hubbard Cc: Alex Sierra Cc: Christoph Hellwig Cc: Alistair Popple Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 982f2607180b..21f8b27bd9fd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1544,9 +1544,16 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - return !(is_device_coherent_page(page) || - is_zone_movable_page(page) || - is_zero_pfn(page_to_pfn(page))); + /* The zero page may always be pinned */ + if (is_zero_pfn(page_to_pfn(page))) + return true; + + /* Coherent device memory must always allow eviction. */ + if (is_device_coherent_page(page)) + return false; + + /* Otherwise, non-movable zone pages can be pinned. */ + return !is_zone_movable_page(page); } #else static inline bool is_longterm_pinnable_page(struct page *page) -- cgit v1.2.3 From dbb16df6443c59e8a1ef21c2272fcf387d600ddf Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 17 Aug 2022 17:21:39 +0000 Subject: Revert "memcg: cleanup racy sum avoidance code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 96e51ccf1af33e82f429a0d6baebba29c6448d0f. Recently we started running the kernel with rstat infrastructure on production traffic and begin to see negative memcg stats values. Particularly the 'sock' stat is the one which we observed having negative value. $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 18446744073708724224 Re-run after couple of seconds $ grep "sock " /mnt/memory/job/memory.stat sock 253952 total_sock 53248 For now we are only seeing this issue on large machines (256 CPUs) and only with 'sock' stat. I think the networking stack increase the stat on one cpu and decrease it on another cpu much more often. So, this negative sock is due to rstat flusher flushing the stats on the CPU that has seen the decrement of sock but missed the CPU that has increments. A typical race condition. For easy stable backport, revert is the most simple solution. For long term solution, I am thinking of two directions. First is just reduce the race window by optimizing the rstat flusher. Second is if the reader sees a negative stat value, force flush and restart the stat collection. Basically retry but limited. Link: https://lkml.kernel.org/r/20220817172139.3141101-1-shakeelb@google.com Fixes: 96e51ccf1af33e8 ("memcg: cleanup racy sum avoidance code") Signed-off-by: Shakeel Butt Cc: "Michal Koutný" Cc: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: David Hildenbrand Cc: Yosry Ahmed Cc: Greg Thelen Cc: [5.15] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4d31ce55b1c0..6257867fbf95 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -987,19 +987,30 @@ static inline void mod_memcg_page_state(struct page *page, static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - return READ_ONCE(memcg->vmstats.state[idx]); + long x = READ_ONCE(memcg->vmstats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; + long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return READ_ONCE(pn->lruvec_stats.state[idx]); + x = READ_ONCE(pn->lruvec_stats.state[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, -- cgit v1.2.3 From 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Fri, 19 Aug 2022 16:11:45 +0800 Subject: asm-generic: sections: refactor memory_intersects There are two problems with the current code of memory_intersects: First, it doesn't check whether the region (begin, end) falls inside the region (virt, vend), that is (virt < begin && vend > end). The second problem is if vend is equal to begin, it will return true but this is wrong since vend (virt + size) is not the last address of the memory region but (virt + size -1) is. The wrong determination will trigger the misreporting when the function check_for_illegal_area calls memory_intersects to check if the dma region intersects with stext region. The misreporting is as below (stext is at 0x80100000): WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168 DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536] Modules linked in: CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5 Hardware name: Xilinx Zynq Platform unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from __warn+0xb0/0x198 __warn from warn_slowpath_fmt+0x80/0xb4 warn_slowpath_fmt from check_for_illegal_area+0x130/0x168 check_for_illegal_area from debug_dma_map_sg+0x94/0x368 debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128 __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24 dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4 usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214 usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118 usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70 usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360 usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440 usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238 usb_stor_control_thread from kthread+0xf8/0x104 kthread from ret_from_fork+0x14/0x2c Refactor memory_intersects to fix the two problems above. Before the 1d7db834a027e ("dma-debug: use memory_intersects() directly"), memory_intersects is called only by printk_late_init: printk_late_init -> init_section_intersects ->memory_intersects. There were few places where memory_intersects was called. When commit 1d7db834a027e ("dma-debug: use memory_intersects() directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA subsystem uses it to check for an illegal area and the calltrace above is triggered. [akpm@linux-foundation.org: fix nearby comment typo] Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.com Fixes: 979559362516 ("asm/sections: add helpers to check for section data") Signed-off-by: Quanyang Wang Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Thierry Reding Cc: Signed-off-by: Andrew Morton --- include/asm-generic/sections.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d0f7bdd2fdf2..db13bb620f52 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -97,7 +97,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, /** * memory_intersects - checks if the region occupied by an object intersects * with another memory region - * @begin: virtual address of the beginning of the memory regien + * @begin: virtual address of the beginning of the memory region * @end: virtual address of the end of the memory region * @virt: virtual address of the memory object * @size: size of the memory object @@ -110,7 +110,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, { void *vend = virt + size; - return (virt >= begin && virt < end) || (vend >= begin && vend < end); + if (virt < end && vend > begin) + return true; + + return false; } /** -- cgit v1.2.3 From dcf8e5633e2e69ad60b730ab5905608b756a032f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Aug 2022 12:59:25 -0700 Subject: tracing: Define the is_signed_type() macro once There are two definitions of the is_signed_type() macro: one in and a second definition in . As suggested by Linus, move the definition of the is_signed_type() macro into the header file. Change the definition of the is_signed_type() macro to make sure that it does not trigger any sparse warnings with future versions of sparse for bitwise types. Link: https://lore.kernel.org/all/CAHk-=whjH6p+qzwUdx5SOVVHjS3WvzJQr6mDUwhEyTf6pJWzaQ@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjQGnVfb4jehFR0XyZikdQvCZouE96xR_nnf5kqaM5qqQ@mail.gmail.com/ Cc: Rasmus Villemoes Cc: Steven Rostedt Acked-by: Kees Cook Signed-off-by: Bart Van Assche Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ include/linux/overflow.h | 1 - include/linux/trace_events.h | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 01ce94b58b42..7713d7bcdaea 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -239,6 +239,12 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * Whether 'type' is a signed type or an unsigned type. Supports scalar types, + * bool and also pointer types. + */ +#define is_signed_type(type) (((type)(-1)) < (__force type)1) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f1221d11f8e5..0eb3b192f07a 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -30,7 +30,6 @@ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - * credit to Christian Biere. */ -#define is_signed_type(type) (((type)(-1)) < (type)1) #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index b18759a673c6..8401dec93c15 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -814,8 +814,6 @@ extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < (type)1) - int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, -- cgit v1.2.3 From 9c6d778800b921bde3bff3cff5003d1650f942d1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 26 Aug 2022 15:31:32 -0400 Subject: USB: core: Prevent nested device-reset calls Automatic kernel fuzzing revealed a recursive locking violation in usb-storage: ============================================ WARNING: possible recursive locking detected 5.18.0 #3 Not tainted -------------------------------------------- kworker/1:3/1205 is trying to acquire lock: ffff888018638db8 (&us_interface_key[i]){+.+.}-{3:3}, at: usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 but task is already holding lock: ffff888018638db8 (&us_interface_key[i]){+.+.}-{3:3}, at: usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 ... stack backtrace: CPU: 1 PID: 1205 Comm: kworker/1:3 Not tainted 5.18.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2988 [inline] check_deadlock kernel/locking/lockdep.c:3031 [inline] validate_chain kernel/locking/lockdep.c:3816 [inline] __lock_acquire.cold+0x152/0x3ca kernel/locking/lockdep.c:5053 lock_acquire kernel/locking/lockdep.c:5665 [inline] lock_acquire+0x1ab/0x520 kernel/locking/lockdep.c:5630 __mutex_lock_common kernel/locking/mutex.c:603 [inline] __mutex_lock+0x14f/0x1610 kernel/locking/mutex.c:747 usb_stor_pre_reset+0x35/0x40 drivers/usb/storage/usb.c:230 usb_reset_device+0x37d/0x9a0 drivers/usb/core/hub.c:6109 r871xu_dev_remove+0x21a/0x270 drivers/staging/rtl8712/usb_intf.c:622 usb_unbind_interface+0x1bd/0x890 drivers/usb/core/driver.c:458 device_remove drivers/base/dd.c:545 [inline] device_remove+0x11f/0x170 drivers/base/dd.c:537 __device_release_driver drivers/base/dd.c:1222 [inline] device_release_driver_internal+0x1a7/0x2f0 drivers/base/dd.c:1248 usb_driver_release_interface+0x102/0x180 drivers/usb/core/driver.c:627 usb_forced_unbind_intf+0x4d/0xa0 drivers/usb/core/driver.c:1118 usb_reset_device+0x39b/0x9a0 drivers/usb/core/hub.c:6114 This turned out not to be an error in usb-storage but rather a nested device reset attempt. That is, as the rtl8712 driver was being unbound from a composite device in preparation for an unrelated USB reset (that driver does not have pre_reset or post_reset callbacks), its ->remove routine called usb_reset_device() -- thus nesting one reset call within another. Performing a reset as part of disconnect processing is a questionable practice at best. However, the bug report points out that the USB core does not have any protection against nested resets. Adding a reset_in_progress flag and testing it will prevent such errors in the future. Link: https://lore.kernel.org/all/CAB7eexKUpvX-JNiLzhXBDWgfg2T9e9_0Tw4HQ6keN==voRbP0g@mail.gmail.com/ Cc: stable@vger.kernel.org Reported-and-tested-by: Rondreis Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/YwkflDxvg0KWqyZK@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 10 ++++++++++ include/linux/usb.h | 2 ++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 2633acde7ac1..d4b1e70d1498 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6038,6 +6038,11 @@ re_enumerate: * the reset is over (using their post_reset method). * * Return: The same as for usb_reset_and_verify_device(). + * However, if a reset is already in progress (for instance, if a + * driver doesn't have pre_ or post_reset() callbacks, and while + * being unbound or re-bound during the ongoing reset its disconnect() + * or probe() routine tries to perform a second, nested reset), the + * routine returns -EINPROGRESS. * * Note: * The caller must own the device lock. For example, it's safe to use @@ -6071,6 +6076,10 @@ int usb_reset_device(struct usb_device *udev) return -EISDIR; } + if (udev->reset_in_progress) + return -EINPROGRESS; + udev->reset_in_progress = 1; + port_dev = hub->ports[udev->portnum - 1]; /* @@ -6135,6 +6144,7 @@ int usb_reset_device(struct usb_device *udev) usb_autosuspend_device(udev); memalloc_noio_restore(noio_flag); + udev->reset_in_progress = 0; return ret; } EXPORT_SYMBOL_GPL(usb_reset_device); diff --git a/include/linux/usb.h b/include/linux/usb.h index f7a9914fc97f..9ff1ad4dfad1 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -575,6 +575,7 @@ struct usb3_lpm_parameters { * @devaddr: device address, XHCI: assigned by HW, others: same as devnum * @can_submit: URBs may be submitted * @persist_enabled: USB_PERSIST enabled for this device + * @reset_in_progress: the device is being reset * @have_langid: whether string_langid is valid * @authorized: policy has said we can use it; * (user space) policy determines if we authorize this device to be @@ -662,6 +663,7 @@ struct usb_device { unsigned can_submit:1; unsigned persist_enabled:1; + unsigned reset_in_progress:1; unsigned have_langid:1; unsigned authorized:1; unsigned authenticated:1; -- cgit v1.2.3 From fce1c23f629173e0db78b79a74f2052044a00e65 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 23 Aug 2022 10:39:47 +0300 Subject: net: virtio_net: fix notification coalescing comments Fix wording in comments for the notifications coalescing feature. Signed-off-by: Alvaro Karsz Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220823073947.14774-1-alvaro.karsz@solid-run.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/virtio_net.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 29ced55514d4..6cb842ea8979 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ -#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ +#define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -364,24 +364,24 @@ struct virtio_net_hash_config { */ #define VIRTIO_NET_CTRL_NOTF_COAL 6 /* - * Set the tx-usecs/tx-max-packets patameters. - * tx-usecs - Maximum number of usecs to delay a TX notification. - * tx-max-packets - Maximum number of packets to send before a TX notification. + * Set the tx-usecs/tx-max-packets parameters. */ struct virtio_net_ctrl_coal_tx { + /* Maximum number of packets to send before a TX notification */ __le32 tx_max_packets; + /* Maximum number of usecs to delay a TX notification */ __le32 tx_usecs; }; #define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 /* - * Set the rx-usecs/rx-max-packets patameters. - * rx-usecs - Maximum number of usecs to delay a RX notification. - * rx-max-frames - Maximum number of packets to receive before a RX notification. + * Set the rx-usecs/rx-max-packets parameters. */ struct virtio_net_ctrl_coal_rx { + /* Maximum number of packets to receive before a RX notification */ __le32 rx_max_packets; + /* Maximum number of usecs to delay a RX notification */ __le32 rx_usecs; }; -- cgit v1.2.3 From ec1bd37123c607ca6485beb4542a792a4db765aa Mon Sep 17 00:00:00 2001 From: Khalid Masum Date: Thu, 18 Aug 2022 10:07:38 +0600 Subject: fscache: fix misdocumented parameter This patch fixes two warnings generated by make docs. The functions fscache_use_cookie and fscache_unuse_cookie, both have a parameter named cookie. But they are documented with the name "object" with unclear description. Which generates the warning when creating docs. This commit will replace the currently misdocumented parameter names with the correct ones while adding proper descriptions. CC: Randy Dunlap Signed-off-by: Khalid Masum Signed-off-by: David Howells Link: https://lore.kernel.org/r/20220521142446.4746-1-khalid.masum.92@gmail.com/ # v1 Link: https://lore.kernel.org/r/20220818040738.12036-1-khalid.masum.92@gmail.com/ # v2 Link: https://lore.kernel.org/r/880d7d25753fb326ee17ac08005952112fcf9bdb.1657360984.git.mchehab@kernel.org/ # Mauro's version --- include/linux/fscache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 720874e6ee94..36e5dd84cf59 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -258,7 +258,7 @@ struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, /** * fscache_use_cookie - Request usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @will_modify: If cache is expected to be modified locally * * Request usage of the cookie attached to an object. The caller should tell @@ -274,7 +274,7 @@ static inline void fscache_use_cookie(struct fscache_cookie *cookie, /** * fscache_unuse_cookie - Cease usage of cookie attached to an object - * @object: Object description + * @cookie: The cookie representing the cache object * @aux_data: Updated auxiliary data (or NULL) * @object_size: Revised size of the object (or NULL) * -- cgit v1.2.3 From 2555283eb40df89945557273121e9393ef9b542b Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 31 Aug 2022 19:06:00 +0200 Subject: mm/rmap: Fix anon_vma->degree ambiguity leading to double-reuse anon_vma->degree tracks the combined number of child anon_vmas and VMAs that use the anon_vma as their ->anon_vma. anon_vma_clone() then assumes that for any anon_vma attached to src->anon_vma_chain other than src->anon_vma, it is impossible for it to be a leaf node of the VMA tree, meaning that for such VMAs ->degree is elevated by 1 because of a child anon_vma, meaning that if ->degree equals 1 there are no VMAs that use the anon_vma as their ->anon_vma. This assumption is wrong because the ->degree optimization leads to leaf nodes being abandoned on anon_vma_clone() - an existing anon_vma is reused and no new parent-child relationship is created. So it is possible to reuse an anon_vma for one VMA while it is still tied to another VMA. This is an issue because is_mergeable_anon_vma() and its callers assume that if two VMAs have the same ->anon_vma, the list of anon_vmas attached to the VMAs is guaranteed to be the same. When this assumption is violated, vma_merge() can merge pages into a VMA that is not attached to the corresponding anon_vma, leading to dangling page->mapping pointers that will be dereferenced during rmap walks. Fix it by separately tracking the number of child anon_vmas and the number of VMAs using the anon_vma as their ->anon_vma. Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") Cc: stable@kernel.org Acked-by: Michal Hocko Acked-by: Vlastimil Babka Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 7 +++++-- mm/rmap.c | 29 ++++++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf80adca980b..b89b4b86951f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -41,12 +41,15 @@ struct anon_vma { atomic_t refcount; /* - * Count of child anon_vmas and VMAs which points to this anon_vma. + * Count of child anon_vmas. Equals to the count of all anon_vmas that + * have ->parent pointing to this one, including itself. * * This counter is used for making decision about reusing anon_vma * instead of forking new one. See comments in function anon_vma_clone. */ - unsigned degree; + unsigned long num_children; + /* Count of VMAs whose ->anon_vma pointer points to this object. */ + unsigned long num_active_vmas; struct anon_vma *parent; /* Parent of this anon_vma */ diff --git a/mm/rmap.c b/mm/rmap.c index edc06c52bc82..93d5a6f793d2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -93,7 +93,8 @@ static inline struct anon_vma *anon_vma_alloc(void) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { atomic_set(&anon_vma->refcount, 1); - anon_vma->degree = 1; /* Reference for first vma */ + anon_vma->num_children = 0; + anon_vma->num_active_vmas = 0; anon_vma->parent = anon_vma; /* * Initialise the anon_vma root to point to itself. If called @@ -201,6 +202,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) goto out_enomem_free_avc; + anon_vma->num_children++; /* self-parent link for new root */ allocated = anon_vma; } @@ -210,8 +212,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; anon_vma_chain_link(vma, avc, anon_vma); - /* vma reference or self-parent link for new root */ - anon_vma->degree++; + anon_vma->num_active_vmas++; allocated = NULL; avc = NULL; } @@ -296,19 +297,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) anon_vma_chain_link(dst, avc, anon_vma); /* - * Reuse existing anon_vma if its degree lower than two, - * that means it has no vma and only one anon_vma child. + * Reuse existing anon_vma if it has no vma and only one + * anon_vma child. * - * Do not choose parent anon_vma, otherwise first child - * will always reuse it. Root anon_vma is never reused: + * Root anon_vma is never reused: * it has self-parent reference and at least one child. */ if (!dst->anon_vma && src->anon_vma && - anon_vma != src->anon_vma && anon_vma->degree < 2) + anon_vma->num_children < 2 && + anon_vma->num_active_vmas == 0) dst->anon_vma = anon_vma; } if (dst->anon_vma) - dst->anon_vma->degree++; + dst->anon_vma->num_active_vmas++; unlock_anon_vma_root(root); return 0; @@ -358,6 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) anon_vma = anon_vma_alloc(); if (!anon_vma) goto out_error; + anon_vma->num_active_vmas++; avc = anon_vma_chain_alloc(GFP_KERNEL); if (!avc) goto out_error_free_anon_vma; @@ -378,7 +380,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) vma->anon_vma = anon_vma; anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); - anon_vma->parent->degree++; + anon_vma->parent->num_children++; anon_vma_unlock_write(anon_vma); return 0; @@ -410,7 +412,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) * to free them outside the lock. */ if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { - anon_vma->parent->degree--; + anon_vma->parent->num_children--; continue; } @@ -418,7 +420,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) anon_vma_chain_free(avc); } if (vma->anon_vma) { - vma->anon_vma->degree--; + vma->anon_vma->num_active_vmas--; /* * vma would still be needed after unlink, and anon_vma will be prepared @@ -436,7 +438,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; - VM_WARN_ON(anon_vma->degree); + VM_WARN_ON(anon_vma->num_children); + VM_WARN_ON(anon_vma->num_active_vmas); put_anon_vma(anon_vma); list_del(&avc->same_vma); -- cgit v1.2.3 From 79e3602caa6f9d59c4f66a268407080496dae408 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2022 11:56:56 -0700 Subject: tcp: make global challenge ack rate limitation per net-ns and default disabled Because per host rate limiting has been proven problematic (side channel attacks can be based on it), per host rate limiting of challenge acks ideally should be per netns and turned off by default. This is a long due followup of following commits: 083ae308280d ("tcp: enable per-socket rate limiting of all 'challenge acks'") f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock") 75ff39ccc1bd ("tcp: make challenge acks less predictable") Signed-off-by: Eric Dumazet Cc: Jason Baron Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 5 ++++- include/net/netns/ipv4.h | 2 ++ net/ipv4/tcp_input.c | 21 +++++++++++---------- net/ipv4/tcp_ipv4.c | 6 ++++-- 4 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 56cd4ea059b2..a759872a2883 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1035,7 +1035,10 @@ tcp_limit_output_bytes - INTEGER tcp_challenge_ack_limit - INTEGER Limits number of Challenge ACK sent per second, as recommended in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) - Default: 1000 + Note that this per netns rate limit can allow some side channel + attacks and probably should not be enabled. + TCP stack implements per TCP socket limits anyway. + Default: INT_MAX (unlimited) UDP variables ============= diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c7320ef356d9..6320a76cefdc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -179,6 +179,8 @@ struct netns_ipv4 { unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; + u32 tcp_challenge_timestamp; + u32 tcp_challenge_count; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c184e15397a2..b85a9f755da4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3614,12 +3614,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { - /* unprotected vars, we dont care of overwrites */ - static u32 challenge_timestamp; - static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); - u32 count, now; + u32 count, now, ack_limit; /* First check our per-socket dupack rate limit. */ if (__tcp_oow_rate_limited(net, @@ -3627,18 +3624,22 @@ static void tcp_send_challenge_ack(struct sock *sk) &tp->last_oow_ack_time)) return; + ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + if (ack_limit == INT_MAX) + goto send_ack; + /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; - if (now != READ_ONCE(challenge_timestamp)) { - u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) { u32 half = (ack_limit + 1) >> 1; - WRITE_ONCE(challenge_timestamp, now); - WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); + WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now); + WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit)); } - count = READ_ONCE(challenge_count); + count = READ_ONCE(net->ipv4.tcp_challenge_count); if (count > 0) { - WRITE_ONCE(challenge_count, count - 1); + WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1); +send_ack: NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c83780dc9bf..5b019ba2b9d2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3139,8 +3139,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tso_win_divisor = 3; /* Default TSQ limit of 16 TSO segments */ net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536; - /* rfc5961 challenge ack rate limiting */ - net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; + + /* rfc5961 challenge ack rate limiting, per net-ns, disabled by default. */ + net->ipv4.sysctl_tcp_challenge_ack_limit = INT_MAX; + net->ipv4.sysctl_tcp_min_tso_segs = 2; net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */ net->ipv4.sysctl_tcp_min_rtt_wlen = 300; -- cgit v1.2.3 From 23c12d5fc02fb0712c64f3e87a27fcfa78e8af9c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:01 +0100 Subject: Revert "io_uring: add zc notification flush requests" This reverts commit 492dddb4f6e3a5839c27d41ff1fecdbe6c3ab851. Soon we won't have the very notion of notification flushing, so remove notification flushing requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8850334ca56e65b413cb34fd158db81d7b2865a3.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 - io_uring/rsrc.c | 38 -------------------------------------- 2 files changed, 39 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9e0b5c8d92ce..18ae5caf1773 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -301,7 +301,6 @@ enum io_uring_op { */ enum { IORING_RSRC_UPDATE_FILES, - IORING_RSRC_UPDATE_NOTIF, }; /* diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 71359a4d0bd4..048f7483fe8a 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -15,7 +15,6 @@ #include "io_uring.h" #include "openclose.h" #include "rsrc.h" -#include "notif.h" struct io_rsrc_update { struct file *file; @@ -741,41 +740,6 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static int io_notif_update(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); - struct io_ring_ctx *ctx = req->ctx; - unsigned len = up->nr_args; - unsigned idx_end, idx = up->offset; - int ret = 0; - - io_ring_submit_lock(ctx, issue_flags); - if (unlikely(check_add_overflow(idx, len, &idx_end))) { - ret = -EOVERFLOW; - goto out; - } - if (unlikely(idx_end > ctx->nr_notif_slots)) { - ret = -EINVAL; - goto out; - } - - for (; idx < idx_end; idx++) { - struct io_notif_slot *slot = &ctx->notif_slots[idx]; - - if (!slot->notif) - continue; - if (up->arg) - slot->tag = up->arg; - io_notif_slot_flush_submit(slot, issue_flags); - } -out: - io_ring_submit_unlock(ctx, issue_flags); - if (ret < 0) - req_set_fail(req); - io_req_set_res(req, ret, 0); - return IOU_OK; -} - int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); @@ -783,8 +747,6 @@ int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) switch (up->type) { case IORING_RSRC_UPDATE_FILES: return io_files_update(req, issue_flags); - case IORING_RSRC_UPDATE_NOTIF: - return io_notif_update(req, issue_flags); } return -EINVAL; } -- cgit v1.2.3 From d9808ceb3129b811becebdee3ec96d189c83e56c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:02 +0100 Subject: Revert "io_uring: rename IORING_OP_FILES_UPDATE" This reverts commit 4379d5f15b3fd4224c37841029178aa8082a242e. We removed notification flushing, also cleanup uapi preparation changes to not pollute it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/89edc3905350f91e1b6e26d9dbf42ee44fd451a2.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +----------- io_uring/opdef.c | 9 ++++----- io_uring/rsrc.c | 17 ++--------------- io_uring/rsrc.h | 4 ++-- 4 files changed, 9 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 18ae5caf1773..111b651366bd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -178,8 +178,7 @@ enum io_uring_op { IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, - IORING_OP_RSRC_UPDATE, - IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE, + IORING_OP_FILES_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, @@ -228,7 +227,6 @@ enum io_uring_op { #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) - /* * sqe->splice_flags * extends splice(2) flags @@ -295,14 +293,6 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) - -/* - * IORING_OP_RSRC_UPDATE flags - */ -enum { - IORING_RSRC_UPDATE_FILES, -}; - /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 41410126c1c6..10b301ccf5cd 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -246,13 +246,12 @@ const struct io_op_def io_op_defs[] = { .prep = io_close_prep, .issue = io_close, }, - [IORING_OP_RSRC_UPDATE] = { + [IORING_OP_FILES_UPDATE] = { .audit_skip = 1, .iopoll = 1, - .name = "RSRC_UPDATE", - .prep = io_rsrc_update_prep, - .issue = io_rsrc_update, - .ioprio = 1, + .name = "FILES_UPDATE", + .prep = io_files_update_prep, + .issue = io_files_update, }, [IORING_OP_STATX] = { .audit_skip = 1, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 048f7483fe8a..cf3272113214 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -21,7 +21,6 @@ struct io_rsrc_update { u64 arg; u32 nr_args; u32 offset; - int type; }; static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, @@ -654,7 +653,7 @@ __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; } -int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); @@ -668,7 +667,6 @@ int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (!up->nr_args) return -EINVAL; up->arg = READ_ONCE(sqe->addr); - up->type = READ_ONCE(sqe->ioprio); return 0; } @@ -711,7 +709,7 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req, return ret; } -static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) +int io_files_update(struct io_kiocb *req, unsigned int issue_flags) { struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); struct io_ring_ctx *ctx = req->ctx; @@ -740,17 +738,6 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); - - switch (up->type) { - case IORING_RSRC_UPDATE_FILES: - return io_files_update(req, issue_flags); - } - return -EINVAL; -} - int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, struct io_rsrc_node *node, void *rsrc) { diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index f3a9a177941f..9bce15665444 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -167,8 +167,8 @@ static inline u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) return &data->tags[table_idx][off]; } -int io_rsrc_update(struct io_kiocb *req, unsigned int issue_flags); -int io_rsrc_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_files_update(struct io_kiocb *req, unsigned int issue_flags); +int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int __io_account_mem(struct user_struct *user, unsigned long nr_pages); -- cgit v1.2.3 From 57f332246afa5929bdf2e7a5facddedb43549be4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:03 +0100 Subject: io_uring/notif: remove notif registration We're going to remove the userspace exposed zerocopy notification API, remove notification registration. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6ff00b97be99869c386958a990593c9c31cf105b.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ----- io_uring/io_uring.c | 10 ------ io_uring/net.c | 4 +-- io_uring/notif.c | 71 ------------------------------------------- io_uring/notif.h | 11 ------- 5 files changed, 1 insertion(+), 103 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 111b651366bd..b11c57b0ebb5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -279,14 +279,10 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. - * - * IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful - * successful. Only for zerocopy sends. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) -#define IORING_RECVSEND_NOTIF_FLUSH (1U << 3) /* * accept flags stored in sqe->ioprio @@ -474,10 +470,6 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, - /* zerocopy notification API */ - IORING_REGISTER_NOTIFIERS = 26, - IORING_UNREGISTER_NOTIFIERS = 27, - /* this goes last */ IORING_REGISTER_LAST }; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 77616279000b..c2e06a3aa18d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2640,7 +2640,6 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_unregister_personality(ctx, index); if (ctx->rings) io_poll_remove_all(ctx, NULL, true); - io_notif_unregister(ctx); mutex_unlock(&ctx->uring_lock); /* failed during ring init, it couldn't have issued any requests */ @@ -3839,15 +3838,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_file_alloc_range(ctx, arg); break; - case IORING_REGISTER_NOTIFIERS: - ret = io_notif_register(ctx, arg, nr_args); - break; - case IORING_UNREGISTER_NOTIFIERS: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_notif_unregister(ctx); - break; default: ret = -EINVAL; break; diff --git a/io_uring/net.c b/io_uring/net.c index 7a5468cc905e..aac6997b7d88 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -889,7 +889,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->flags = READ_ONCE(sqe->ioprio); if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | - IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH)) + IORING_RECVSEND_FIXED_BUF)) return -EINVAL; if (zc->flags & IORING_RECVSEND_FIXED_BUF) { unsigned idx = READ_ONCE(sqe->buf_index); @@ -1063,8 +1063,6 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); - } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) { - io_notif_slot_flush_submit(notif_slot, 0); } if (ret >= 0) diff --git a/io_uring/notif.c b/io_uring/notif.c index 96f076b175e0..11f45640684a 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -86,74 +86,3 @@ void io_notif_slot_flush(struct io_notif_slot *slot) io_req_task_work_add(notif); } } - -__cold int io_notif_unregister(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - int i; - - if (!ctx->notif_slots) - return -ENXIO; - - for (i = 0; i < ctx->nr_notif_slots; i++) { - struct io_notif_slot *slot = &ctx->notif_slots[i]; - struct io_kiocb *notif = slot->notif; - struct io_notif_data *nd; - - if (!notif) - continue; - nd = io_notif_to_data(notif); - slot->notif = NULL; - if (!refcount_dec_and_test(&nd->uarg.refcnt)) - continue; - notif->io_task_work.func = __io_notif_complete_tw; - io_req_task_work_add(notif); - } - - kvfree(ctx->notif_slots); - ctx->notif_slots = NULL; - ctx->nr_notif_slots = 0; - return 0; -} - -__cold int io_notif_register(struct io_ring_ctx *ctx, - void __user *arg, unsigned int size) - __must_hold(&ctx->uring_lock) -{ - struct io_uring_notification_slot __user *slots; - struct io_uring_notification_slot slot; - struct io_uring_notification_register reg; - unsigned i; - - if (ctx->nr_notif_slots) - return -EBUSY; - if (size != sizeof(reg)) - return -EINVAL; - if (copy_from_user(®, arg, sizeof(reg))) - return -EFAULT; - if (!reg.nr_slots || reg.nr_slots > IORING_MAX_NOTIF_SLOTS) - return -EINVAL; - if (reg.resv || reg.resv2 || reg.resv3) - return -EINVAL; - - slots = u64_to_user_ptr(reg.data); - ctx->notif_slots = kvcalloc(reg.nr_slots, sizeof(ctx->notif_slots[0]), - GFP_KERNEL_ACCOUNT); - if (!ctx->notif_slots) - return -ENOMEM; - - for (i = 0; i < reg.nr_slots; i++, ctx->nr_notif_slots++) { - struct io_notif_slot *notif_slot = &ctx->notif_slots[i]; - - if (copy_from_user(&slot, &slots[i], sizeof(slot))) { - io_notif_unregister(ctx); - return -EFAULT; - } - if (slot.resv[0] | slot.resv[1] | slot.resv[2]) { - io_notif_unregister(ctx); - return -EINVAL; - } - notif_slot->tag = slot.tag; - } - return 0; -} diff --git a/io_uring/notif.h b/io_uring/notif.h index 80f6445e0c2b..8380eeff2f2e 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -8,7 +8,6 @@ #include "rsrc.h" #define IO_NOTIF_SPLICE_BATCH 32 -#define IORING_MAX_NOTIF_SLOTS (1U << 15) struct io_notif_data { struct file *file; @@ -36,10 +35,6 @@ struct io_notif_slot { u32 seq; }; -int io_notif_register(struct io_ring_ctx *ctx, - void __user *arg, unsigned int size); -int io_notif_unregister(struct io_ring_ctx *ctx); - void io_notif_slot_flush(struct io_notif_slot *slot); struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, struct io_notif_slot *slot); @@ -67,12 +62,6 @@ static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, return &ctx->notif_slots[idx]; } -static inline void io_notif_slot_flush_submit(struct io_notif_slot *slot, - unsigned int issue_flags) -{ - io_notif_slot_flush(slot); -} - static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len) { struct io_ring_ctx *ctx = notif->ctx; -- cgit v1.2.3 From b48c312be05e83b55a4d58bf61f80b4a3288fb7e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:04 +0100 Subject: io_uring/net: simplify zerocopy send user API Following user feedback, this patch simplifies zerocopy send API. One of the main complaints is that the current API is difficult with the userspace managing notification slots, and then send retries with error handling make it even worse. Instead of keeping notification slots change it to the per-request notifications model, which posts both completion and notification CQEs for each request when any data has been sent, and only one CQE if it fails. All notification CQEs will have IORING_CQE_F_NOTIF set and IORING_CQE_F_MORE in completion CQEs indicates whether to wait a notification or not. IOSQE_CQE_SKIP_SUCCESS is disallowed with zerocopy sends for now. This is less flexible, but greatly simplifies the user API and also the kernel implementation. We reuse notif helpers in this patch, but in the future there won't be need for keeping two requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/95287640ab98fc9417370afb16e310677c63e6ce.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 ++++-- io_uring/io_uring.c | 4 ++-- io_uring/net.c | 53 +++++++++++++++++++++++++++---------------- io_uring/net.h | 1 + io_uring/notif.c | 12 ++-------- io_uring/notif.h | 43 ++--------------------------------- io_uring/opdef.c | 3 ++- 7 files changed, 47 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b11c57b0ebb5..6b83177fd41d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -71,8 +71,8 @@ struct io_uring_sqe { __s32 splice_fd_in; __u32 file_index; struct { - __u16 notification_idx; __u16 addr_len; + __u16 __pad3[1]; }; }; union { @@ -205,7 +205,7 @@ enum io_uring_op { IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, - IORING_OP_SENDZC_NOTIF, + IORING_OP_SEND_ZC, /* this goes last, obviously */ IORING_OP_LAST, @@ -326,10 +326,13 @@ struct io_uring_cqe { * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv + * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct + * them from sends. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) +#define IORING_CQE_F_NOTIF (1U << 3) enum { IORING_CQE_BUFFER_SHIFT = 16, diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c2e06a3aa18d..f9be9b7eb654 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3923,8 +3923,8 @@ static int __init io_uring_init(void) BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); BUILD_BUG_SQE_ELEM(44, __u32, file_index); - BUILD_BUG_SQE_ELEM(44, __u16, notification_idx); - BUILD_BUG_SQE_ELEM(46, __u16, addr_len); + BUILD_BUG_SQE_ELEM(44, __u16, addr_len); + BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); BUILD_BUG_SQE_ELEM(56, __u64, __pad2); diff --git a/io_uring/net.c b/io_uring/net.c index aac6997b7d88..7047c1342541 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -65,12 +65,12 @@ struct io_sendzc { struct file *file; void __user *buf; size_t len; - u16 slot_idx; unsigned msg_flags; unsigned flags; unsigned addr_len; void __user *addr; size_t done_io; + struct io_kiocb *notif; }; #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) @@ -879,12 +879,26 @@ out_free: return ret; } +void io_sendzc_cleanup(struct io_kiocb *req) +{ + struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + + zc->notif->flags |= REQ_F_CQE_SKIP; + io_notif_flush(zc->notif); + zc->notif = NULL; +} + int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *notif; - if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)) + if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) || + READ_ONCE(sqe->__pad3[0])) + return -EINVAL; + /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ + if (req->flags & REQ_F_CQE_SKIP) return -EINVAL; zc->flags = READ_ONCE(sqe->ioprio); @@ -900,11 +914,17 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(req, ctx, 0); } + notif = zc->notif = io_alloc_notif(ctx); + if (!notif) + return -ENOMEM; + notif->cqe.user_data = req->cqe.user_data; + notif->cqe.res = 0; + notif->cqe.flags = IORING_CQE_F_NOTIF; + req->flags |= REQ_F_NEED_CLEANUP; zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; - zc->slot_idx = READ_ONCE(sqe->notification_idx); if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; @@ -976,33 +996,20 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) { struct sockaddr_storage __address, *addr = NULL; - struct io_ring_ctx *ctx = req->ctx; struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); - struct io_notif_slot *notif_slot; - struct io_kiocb *notif; struct msghdr msg; struct iovec iov; struct socket *sock; - unsigned msg_flags; + unsigned msg_flags, cflags; int ret, min_ret = 0; if (!(req->flags & REQ_F_POLLED) && (zc->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; - - if (issue_flags & IO_URING_F_UNLOCKED) - return -EAGAIN; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; - notif_slot = io_get_notif_slot(ctx, zc->slot_idx); - if (!notif_slot) - return -EINVAL; - notif = io_get_notif(ctx, notif_slot); - if (!notif) - return -ENOMEM; - msg.msg_name = NULL; msg.msg_control = NULL; msg.msg_controllen = 0; @@ -1033,7 +1040,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) &msg.msg_iter); if (unlikely(ret)) return ret; - ret = io_notif_account_mem(notif, zc->len); + ret = io_notif_account_mem(zc->notif, zc->len); if (unlikely(ret)) return ret; } @@ -1045,7 +1052,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&msg.msg_iter); msg.msg_flags = msg_flags; - msg.msg_ubuf = &io_notif_to_data(notif)->uarg; + msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; msg.sg_from_iter = io_sg_from_iter; ret = sock_sendmsg(sock, &msg); @@ -1060,6 +1067,8 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_addr(req, addr, issue_flags); } + if (ret < 0 && !zc->done_io) + zc->notif->flags |= REQ_F_CQE_SKIP; if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); @@ -1069,7 +1078,11 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret += zc->done_io; else if (zc->done_io) ret = zc->done_io; - io_req_set_res(req, ret, 0); + + io_notif_flush(zc->notif); + req->flags &= ~REQ_F_NEED_CLEANUP; + cflags = ret >= 0 ? IORING_CQE_F_MORE : 0; + io_req_set_res(req, ret, cflags); return IOU_OK; } diff --git a/io_uring/net.h b/io_uring/net.h index f91f56c6eeac..d744a0a874e7 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -55,6 +55,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags); int io_sendzc(struct io_kiocb *req, unsigned int issue_flags); int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +void io_sendzc_cleanup(struct io_kiocb *req); void io_netmsg_cache_free(struct io_cache_entry *entry); #else diff --git a/io_uring/notif.c b/io_uring/notif.c index 11f45640684a..38d77165edc3 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -42,8 +42,7 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, } } -struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, - struct io_notif_slot *slot) +struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) __must_hold(&ctx->uring_lock) { struct io_kiocb *notif; @@ -59,27 +58,20 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, io_get_task_refs(1); notif->rsrc_node = NULL; io_req_set_rsrc_node(notif, ctx, 0); - notif->cqe.user_data = slot->tag; - notif->cqe.flags = slot->seq++; - notif->cqe.res = 0; nd = io_notif_to_data(notif); nd->account_pages = 0; nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; nd->uarg.callback = io_uring_tx_zerocopy_callback; - /* master ref owned by io_notif_slot, will be dropped on flush */ refcount_set(&nd->uarg.refcnt, 1); return notif; } -void io_notif_slot_flush(struct io_notif_slot *slot) +void io_notif_flush(struct io_kiocb *notif) __must_hold(&slot->notif->ctx->uring_lock) { - struct io_kiocb *notif = slot->notif; struct io_notif_data *nd = io_notif_to_data(notif); - slot->notif = NULL; - /* drop slot's master ref */ if (refcount_dec_and_test(&nd->uarg.refcnt)) { notif->io_task_work.func = __io_notif_complete_tw; diff --git a/io_uring/notif.h b/io_uring/notif.h index 8380eeff2f2e..5b4d710c8ca5 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -15,53 +15,14 @@ struct io_notif_data { unsigned long account_pages; }; -struct io_notif_slot { - /* - * Current/active notifier. A slot holds only one active notifier at a - * time and keeps one reference to it. Flush releases the reference and - * lazily replaces it with a new notifier. - */ - struct io_kiocb *notif; - - /* - * Default ->user_data for this slot notifiers CQEs - */ - u64 tag; - /* - * Notifiers of a slot live in generations, we create a new notifier - * only after flushing the previous one. Track the sequential number - * for all notifiers and copy it into notifiers's cqe->cflags - */ - u32 seq; -}; - -void io_notif_slot_flush(struct io_notif_slot *slot); -struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, - struct io_notif_slot *slot); +void io_notif_flush(struct io_kiocb *notif); +struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx); static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif) { return io_kiocb_to_cmd(notif, struct io_notif_data); } -static inline struct io_kiocb *io_get_notif(struct io_ring_ctx *ctx, - struct io_notif_slot *slot) -{ - if (!slot->notif) - slot->notif = io_alloc_notif(ctx, slot); - return slot->notif; -} - -static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx, - unsigned idx) - __must_hold(&ctx->uring_lock) -{ - if (idx >= ctx->nr_notif_slots) - return NULL; - idx = array_index_nospec(idx, ctx->nr_notif_slots); - return &ctx->notif_slots[idx]; -} - static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len) { struct io_ring_ctx *ctx = notif->ctx; diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 10b301ccf5cd..c61494e0a602 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -470,7 +470,7 @@ const struct io_op_def io_op_defs[] = { .issue = io_uring_cmd, .prep_async = io_uring_cmd_prep_async, }, - [IORING_OP_SENDZC_NOTIF] = { + [IORING_OP_SEND_ZC] = { .name = "SENDZC_NOTIF", .needs_file = 1, .unbound_nonreg_file = 1, @@ -483,6 +483,7 @@ const struct io_op_def io_op_defs[] = { .prep = io_sendzc_prep, .issue = io_sendzc, .prep_async = io_sendzc_prep_async, + .cleanup = io_sendzc_cleanup, #else .prep = io_eopnotsupp_prep, #endif -- cgit v1.2.3 From 6f7a71f804287a7566314ab1a73d8ca2c18ca0d7 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 13 Sep 2022 14:34:54 +0200 Subject: regulator: Add driver for MT6331 PMIC regulators Add a driver for the regulators found in the MT6331 PMIC. This PMIC features six buck and 21 Low DropOut (LDO) regulators. Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220913123456.384513-3-angelogioacchino.delregno@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 9 + drivers/regulator/Makefile | 1 + drivers/regulator/mt6331-regulator.c | 507 +++++++++++++++++++++++++++++ include/linux/regulator/mt6331-regulator.h | 46 +++ 4 files changed, 563 insertions(+) create mode 100644 drivers/regulator/mt6331-regulator.c create mode 100644 include/linux/regulator/mt6331-regulator.h (limited to 'include') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 23e3e4a35cc9..3afeb01446fb 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -787,6 +787,15 @@ config REGULATOR_MT6323 This driver supports the control of different power rails of device through regulator interface. +config REGULATOR_MT6331 + tristate "MediaTek MT6331 PMIC" + depends on MFD_MT6397 + help + Say y here to select this option to enable the power regulator of + MediaTek MT6331 PMIC. + This driver supports the control of different power rails of device + through regulator interface + config REGULATOR_MT6358 tristate "MediaTek MT6358 PMIC" depends on MFD_MT6397 diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index fa49bb6cc544..1d9b2651020e 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_REGULATOR_MPQ7920) += mpq7920.o obj-$(CONFIG_REGULATOR_MT6311) += mt6311-regulator.o obj-$(CONFIG_REGULATOR_MT6315) += mt6315-regulator.o obj-$(CONFIG_REGULATOR_MT6323) += mt6323-regulator.o +obj-$(CONFIG_REGULATOR_MT6331) += mt6331-regulator.o obj-$(CONFIG_REGULATOR_MT6358) += mt6358-regulator.o obj-$(CONFIG_REGULATOR_MT6359) += mt6359-regulator.o obj-$(CONFIG_REGULATOR_MT6360) += mt6360-regulator.o diff --git a/drivers/regulator/mt6331-regulator.c b/drivers/regulator/mt6331-regulator.c new file mode 100644 index 000000000000..56be9a3a84ab --- /dev/null +++ b/drivers/regulator/mt6331-regulator.c @@ -0,0 +1,507 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (c) 2022 Collabora Ltd. +// Author: AngeloGioacchino Del Regno +// +// Based on mt6323-regulator.c, +// Copyright (c) 2016 MediaTek Inc. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MT6331_LDO_MODE_NORMAL 0 +#define MT6331_LDO_MODE_LP 1 + +/* + * MT6331 regulators information + * + * @desc: standard fields of regulator description. + * @qi: Mask for query enable signal status of regulators + * @vselon_reg: Register sections for hardware control mode of bucks + * @vselctrl_reg: Register for controlling the buck control mode. + * @vselctrl_mask: Mask for query buck's voltage control mode. + * @status_reg: Register for regulator enable status where qi unavailable + * @status_mask: Mask for querying regulator enable status + */ +struct mt6331_regulator_info { + struct regulator_desc desc; + u32 qi; + u32 vselon_reg; + u32 vselctrl_reg; + u32 vselctrl_mask; + u32 modeset_reg; + u32 modeset_mask; + u32 status_reg; + u32 status_mask; +}; + +#define MT6331_BUCK(match, vreg, min, max, step, volt_ranges, enreg, \ + vosel, vosel_mask, voselon, vosel_ctrl) \ +[MT6331_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6331_volt_range_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6331_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = (max - min)/step + 1, \ + .linear_ranges = volt_ranges, \ + .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(0), \ + }, \ + .qi = BIT(13), \ + .vselon_reg = voselon, \ + .vselctrl_reg = vosel_ctrl, \ + .vselctrl_mask = BIT(1), \ + .status_mask = 0, \ +} + +#define MT6331_LDO_AO(match, vreg, ldo_volt_table, vosel, vosel_mask) \ +[MT6331_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6331_volt_table_ao_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6331_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = ARRAY_SIZE(ldo_volt_table), \ + .volt_table = ldo_volt_table, \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + }, \ +} + +#define MT6331_LDO_S(match, vreg, ldo_volt_table, enreg, enbit, vosel, \ + vosel_mask, _modeset_reg, _modeset_mask, \ + _status_reg, _status_mask) \ +[MT6331_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6331_volt_table_no_qi_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6331_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = ARRAY_SIZE(ldo_volt_table), \ + .volt_table = ldo_volt_table, \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + }, \ + .modeset_reg = _modeset_reg, \ + .modeset_mask = _modeset_mask, \ + .status_reg = _status_reg, \ + .status_mask = _status_mask, \ +} + +#define MT6331_LDO(match, vreg, ldo_volt_table, enreg, enbit, vosel, \ + vosel_mask, _modeset_reg, _modeset_mask) \ +[MT6331_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = (_modeset_reg ? \ + &mt6331_volt_table_ops : \ + &mt6331_volt_table_no_ms_ops), \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6331_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = ARRAY_SIZE(ldo_volt_table), \ + .volt_table = ldo_volt_table, \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + }, \ + .qi = BIT(15), \ + .modeset_reg = _modeset_reg, \ + .modeset_mask = _modeset_mask, \ +} + +#define MT6331_REG_FIXED(match, vreg, enreg, enbit, qibit, volt, \ + _modeset_reg, _modeset_mask) \ +[MT6331_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = (_modeset_reg ? \ + &mt6331_volt_fixed_ops : \ + &mt6331_volt_fixed_no_ms_ops), \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6331_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = 1, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + .min_uV = volt, \ + }, \ + .qi = BIT(qibit), \ + .modeset_reg = _modeset_reg, \ + .modeset_mask = _modeset_mask, \ +} + +static const struct linear_range buck_volt_range[] = { + REGULATOR_LINEAR_RANGE(700000, 0, 0x7f, 6250), +}; + +static const unsigned int ldo_volt_table1[] = { + 2800000, 3000000, 0, 3200000 +}; + +static const unsigned int ldo_volt_table2[] = { + 1500000, 1800000, 2500000, 2800000, +}; + +static const unsigned int ldo_volt_table3[] = { + 1200000, 1300000, 1500000, 1800000, 2000000, 2800000, 3000000, 3300000, +}; + +static const unsigned int ldo_volt_table4[] = { + 0, 0, 1700000, 1800000, 1860000, 2760000, 3000000, 3100000, +}; + +static const unsigned int ldo_volt_table5[] = { + 1800000, 3300000, 1800000, 3300000, +}; + +static const unsigned int ldo_volt_table6[] = { + 3000000, 3300000, +}; + +static const unsigned int ldo_volt_table7[] = { + 1200000, 1600000, 1700000, 1800000, 1900000, 2000000, 2100000, 2200000, +}; + +static const unsigned int ldo_volt_table8[] = { + 900000, 1000000, 1100000, 1220000, 1300000, 1500000, 1500000, 1500000, +}; + +static const unsigned int ldo_volt_table9[] = { + 1000000, 1050000, 1100000, 1150000, 1200000, 1250000, 1300000, 1300000, +}; + +static const unsigned int ldo_volt_table10[] = { + 1200000, 1300000, 1500000, 1800000, +}; + +static const unsigned int ldo_volt_table11[] = { + 1200000, 1300000, 1400000, 1500000, 1600000, 1700000, 1800000, 1800000, +}; + +static int mt6331_get_status(struct regulator_dev *rdev) +{ + struct mt6331_regulator_info *info = rdev_get_drvdata(rdev); + u32 regval; + int ret; + + ret = regmap_read(rdev->regmap, info->desc.enable_reg, ®val); + if (ret != 0) { + dev_err(&rdev->dev, "Failed to get enable reg: %d\n", ret); + return ret; + } + + return (regval & info->qi) ? REGULATOR_STATUS_ON : REGULATOR_STATUS_OFF; +} + +static int mt6331_ldo_set_mode(struct regulator_dev *rdev, unsigned int mode) +{ + struct mt6331_regulator_info *info = rdev_get_drvdata(rdev); + int val; + + switch (mode) { + case REGULATOR_MODE_STANDBY: + val = MT6331_LDO_MODE_LP; + break; + case REGULATOR_MODE_NORMAL: + val = MT6331_LDO_MODE_NORMAL; + break; + default: + return -EINVAL; + } + + val <<= ffs(info->modeset_mask) - 1; + + return regmap_update_bits(rdev->regmap, info->modeset_reg, + info->modeset_mask, val); +} + +static unsigned int mt6331_ldo_get_mode(struct regulator_dev *rdev) +{ + struct mt6331_regulator_info *info = rdev_get_drvdata(rdev); + unsigned int val; + int ret; + + ret = regmap_read(rdev->regmap, info->modeset_reg, &val); + if (ret < 0) + return ret; + + val &= info->modeset_mask; + val >>= ffs(info->modeset_mask) - 1; + + return (val & BIT(0)) ? REGULATOR_MODE_STANDBY : REGULATOR_MODE_NORMAL; +} + +static const struct regulator_ops mt6331_volt_range_ops = { + .list_voltage = regulator_list_voltage_linear_range, + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6331_get_status, +}; + +static const struct regulator_ops mt6331_volt_table_no_ms_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6331_get_status, +}; + +static const struct regulator_ops mt6331_volt_table_no_qi_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .set_mode = mt6331_ldo_set_mode, + .get_mode = mt6331_ldo_get_mode, +}; + +static const struct regulator_ops mt6331_volt_table_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6331_get_status, + .set_mode = mt6331_ldo_set_mode, + .get_mode = mt6331_ldo_get_mode, +}; + +static const struct regulator_ops mt6331_volt_table_ao_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, +}; + +static const struct regulator_ops mt6331_volt_fixed_no_ms_ops = { + .list_voltage = regulator_list_voltage_linear, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6331_get_status, +}; + +static const struct regulator_ops mt6331_volt_fixed_ops = { + .list_voltage = regulator_list_voltage_linear, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6331_get_status, + .set_mode = mt6331_ldo_set_mode, + .get_mode = mt6331_ldo_get_mode, +}; + +/* The array is indexed by id(MT6331_ID_XXX) */ +static struct mt6331_regulator_info mt6331_regulators[] = { + MT6331_BUCK("buck-vdvfs11", VDVFS11, 700000, 1493750, 6250, + buck_volt_range, MT6331_VDVFS11_CON9, + MT6331_VDVFS11_CON11, GENMASK(6, 0), + MT6331_VDVFS11_CON12, MT6331_VDVFS11_CON7), + MT6331_BUCK("buck-vdvfs12", VDVFS12, 700000, 1493750, 6250, + buck_volt_range, MT6331_VDVFS12_CON9, + MT6331_VDVFS12_CON11, GENMASK(6, 0), + MT6331_VDVFS12_CON12, MT6331_VDVFS12_CON7), + MT6331_BUCK("buck-vdvfs13", VDVFS13, 700000, 1493750, 6250, + buck_volt_range, MT6331_VDVFS13_CON9, + MT6331_VDVFS13_CON11, GENMASK(6, 0), + MT6331_VDVFS13_CON12, MT6331_VDVFS13_CON7), + MT6331_BUCK("buck-vdvfs14", VDVFS14, 700000, 1493750, 6250, + buck_volt_range, MT6331_VDVFS14_CON9, + MT6331_VDVFS14_CON11, GENMASK(6, 0), + MT6331_VDVFS14_CON12, MT6331_VDVFS14_CON7), + MT6331_BUCK("buck-vcore2", VCORE2, 700000, 1493750, 6250, + buck_volt_range, MT6331_VCORE2_CON9, + MT6331_VCORE2_CON11, GENMASK(6, 0), + MT6331_VCORE2_CON12, MT6331_VCORE2_CON7), + MT6331_REG_FIXED("buck-vio18", VIO18, MT6331_VIO18_CON9, 0, 13, 1800000, 0, 0), + MT6331_REG_FIXED("ldo-vrtc", VRTC, MT6331_DIGLDO_CON11, 8, 15, 2800000, 0, 0), + MT6331_REG_FIXED("ldo-vtcxo1", VTCXO1, MT6331_ANALDO_CON1, 10, 15, 2800000, + MT6331_ANALDO_CON1, GENMASK(1, 0)), + MT6331_REG_FIXED("ldo-vtcxo2", VTCXO2, MT6331_ANALDO_CON2, 10, 15, 2800000, + MT6331_ANALDO_CON2, GENMASK(1, 0)), + MT6331_REG_FIXED("ldo-vsram", VSRAM_DVFS1, MT6331_SYSLDO_CON4, 10, 15, 1012500, + MT6331_SYSLDO_CON4, GENMASK(1, 0)), + MT6331_REG_FIXED("ldo-vio28", VIO28, MT6331_DIGLDO_CON1, 10, 15, 2800000, + MT6331_DIGLDO_CON1, GENMASK(1, 0)), + MT6331_LDO("ldo-avdd32aud", AVDD32_AUD, ldo_volt_table1, MT6331_ANALDO_CON3, 10, + MT6331_ANALDO_CON10, GENMASK(6, 5), MT6331_ANALDO_CON3, GENMASK(1, 0)), + MT6331_LDO("ldo-vauxa32", VAUXA32, ldo_volt_table1, MT6331_ANALDO_CON4, 10, + MT6331_ANALDO_CON6, GENMASK(6, 5), MT6331_ANALDO_CON4, GENMASK(1, 0)), + MT6331_LDO("ldo-vemc33", VEMC33, ldo_volt_table6, MT6331_DIGLDO_CON5, 10, + MT6331_DIGLDO_CON17, BIT(6), MT6331_DIGLDO_CON5, GENMASK(1, 0)), + MT6331_LDO("ldo-vibr", VIBR, ldo_volt_table3, MT6331_DIGLDO_CON12, 10, + MT6331_DIGLDO_CON20, GENMASK(6, 4), MT6331_DIGLDO_CON12, GENMASK(1, 0)), + MT6331_LDO("ldo-vmc", VMC, ldo_volt_table5, MT6331_DIGLDO_CON3, 10, + MT6331_DIGLDO_CON15, GENMASK(5, 4), MT6331_DIGLDO_CON3, GENMASK(1, 0)), + MT6331_LDO("ldo-vmch", VMCH, ldo_volt_table6, MT6331_DIGLDO_CON4, 10, + MT6331_DIGLDO_CON16, BIT(6), MT6331_DIGLDO_CON4, GENMASK(1, 0)), + MT6331_LDO("ldo-vmipi", VMIPI, ldo_volt_table3, MT6331_SYSLDO_CON5, 10, + MT6331_SYSLDO_CON13, GENMASK(5, 3), MT6331_SYSLDO_CON5, GENMASK(1, 0)), + MT6331_LDO("ldo-vsim1", VSIM1, ldo_volt_table4, MT6331_DIGLDO_CON8, 10, + MT6331_DIGLDO_CON21, GENMASK(6, 4), MT6331_DIGLDO_CON8, GENMASK(1, 0)), + MT6331_LDO("ldo-vsim2", VSIM2, ldo_volt_table4, MT6331_DIGLDO_CON9, 10, + MT6331_DIGLDO_CON22, GENMASK(6, 4), MT6331_DIGLDO_CON9, GENMASK(1, 0)), + MT6331_LDO("ldo-vusb10", VUSB10, ldo_volt_table9, MT6331_SYSLDO_CON2, 10, + MT6331_SYSLDO_CON10, GENMASK(5, 3), MT6331_SYSLDO_CON2, GENMASK(1, 0)), + MT6331_LDO("ldo-vcama", VCAMA, ldo_volt_table2, MT6331_ANALDO_CON5, 15, + MT6331_ANALDO_CON9, GENMASK(5, 4), 0, 0), + MT6331_LDO_S("ldo-vcamaf", VCAM_AF, ldo_volt_table3, MT6331_DIGLDO_CON2, 10, + MT6331_DIGLDO_CON14, GENMASK(6, 4), MT6331_DIGLDO_CON2, GENMASK(1, 0), + MT6331_EN_STATUS1, BIT(0)), + MT6331_LDO_S("ldo-vcamd", VCAMD, ldo_volt_table8, MT6331_SYSLDO_CON1, 15, + MT6331_SYSLDO_CON9, GENMASK(6, 4), MT6331_SYSLDO_CON1, GENMASK(1, 0), + MT6331_EN_STATUS1, BIT(11)), + MT6331_LDO_S("ldo-vcamio", VCAM_IO, ldo_volt_table10, MT6331_SYSLDO_CON3, 10, + MT6331_SYSLDO_CON11, GENMASK(4, 3), MT6331_SYSLDO_CON3, GENMASK(1, 0), + MT6331_EN_STATUS1, BIT(13)), + MT6331_LDO_S("ldo-vgp1", VGP1, ldo_volt_table3, MT6331_DIGLDO_CON6, 10, + MT6331_DIGLDO_CON19, GENMASK(6, 4), MT6331_DIGLDO_CON6, GENMASK(1, 0), + MT6331_EN_STATUS1, BIT(4)), + MT6331_LDO_S("ldo-vgp2", VGP2, ldo_volt_table10, MT6331_SYSLDO_CON6, 10, + MT6331_SYSLDO_CON14, GENMASK(4, 3), MT6331_SYSLDO_CON6, GENMASK(1, 0), + MT6331_EN_STATUS1, BIT(15)), + MT6331_LDO_S("ldo-vgp3", VGP3, ldo_volt_table10, MT6331_SYSLDO_CON7, 10, + MT6331_SYSLDO_CON15, GENMASK(4, 3), MT6331_SYSLDO_CON7, GENMASK(1, 0), + MT6331_EN_STATUS2, BIT(0)), + MT6331_LDO_S("ldo-vgp4", VGP4, ldo_volt_table7, MT6331_DIGLDO_CON7, 10, + MT6331_DIGLDO_CON18, GENMASK(6, 4), MT6331_DIGLDO_CON7, GENMASK(1, 0), + MT6331_EN_STATUS1, BIT(5)), + MT6331_LDO_AO("ldo-vdig18", VDIG18, ldo_volt_table11, + MT6331_DIGLDO_CON28, GENMASK(14, 12)), +}; + +static int mt6331_set_buck_vosel_reg(struct platform_device *pdev) +{ + struct mt6397_chip *mt6331 = dev_get_drvdata(pdev->dev.parent); + int i; + u32 regval; + + for (i = 0; i < MT6331_ID_VREG_MAX; i++) { + if (mt6331_regulators[i].vselctrl_reg) { + if (regmap_read(mt6331->regmap, + mt6331_regulators[i].vselctrl_reg, + ®val) < 0) { + dev_err(&pdev->dev, + "Failed to read buck ctrl\n"); + return -EIO; + } + + if (regval & mt6331_regulators[i].vselctrl_mask) { + mt6331_regulators[i].desc.vsel_reg = + mt6331_regulators[i].vselon_reg; + } + } + } + + return 0; +} + +static int mt6331_regulator_probe(struct platform_device *pdev) +{ + struct mt6397_chip *mt6331 = dev_get_drvdata(pdev->dev.parent); + struct regulator_config config = {}; + struct regulator_dev *rdev; + int i; + u32 reg_value; + + /* Query buck controller to select activated voltage register part */ + if (mt6331_set_buck_vosel_reg(pdev)) + return -EIO; + + /* Read PMIC chip revision to update constraints and voltage table */ + if (regmap_read(mt6331->regmap, MT6331_HWCID, ®_value) < 0) { + dev_err(&pdev->dev, "Failed to read Chip ID\n"); + return -EIO; + } + reg_value &= GENMASK(7, 0); + + dev_info(&pdev->dev, "Chip ID = 0x%x\n", reg_value); + + /* + * ChipID 0x10 is "MT6331 E1", has a different voltage table and + * it's currently not supported in this driver. Upon detection of + * this ID, refuse to register the regulators, as we will wrongly + * interpret the VSEL for this revision, potentially overvolting + * some device. + */ + if (reg_value == 0x10) { + dev_err(&pdev->dev, "Chip version not supported. Bailing out.\n"); + return -EINVAL; + } + + for (i = 0; i < MT6331_ID_VREG_MAX; i++) { + config.dev = &pdev->dev; + config.driver_data = &mt6331_regulators[i]; + config.regmap = mt6331->regmap; + rdev = devm_regulator_register(&pdev->dev, + &mt6331_regulators[i].desc, &config); + if (IS_ERR(rdev)) { + dev_err(&pdev->dev, "failed to register %s\n", + mt6331_regulators[i].desc.name); + return PTR_ERR(rdev); + } + } + return 0; +} + +static const struct platform_device_id mt6331_platform_ids[] = { + {"mt6331-regulator", 0}, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(platform, mt6331_platform_ids); + +static struct platform_driver mt6331_regulator_driver = { + .driver = { + .name = "mt6331-regulator", + }, + .probe = mt6331_regulator_probe, + .id_table = mt6331_platform_ids, +}; + +module_platform_driver(mt6331_regulator_driver); + +MODULE_AUTHOR("AngeloGioacchino Del Regno "); +MODULE_DESCRIPTION("Regulator Driver for MediaTek MT6331 PMIC"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/regulator/mt6331-regulator.h b/include/linux/regulator/mt6331-regulator.h new file mode 100644 index 000000000000..2801a9879c14 --- /dev/null +++ b/include/linux/regulator/mt6331-regulator.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Collabora Ltd. + * Author: AngeloGioacchino Del Regno + */ + +#ifndef __LINUX_REGULATOR_MT6331_H +#define __LINUX_REGULATOR_MT6331_H + +enum { + /* BUCK */ + MT6331_ID_VDVFS11 = 0, + MT6331_ID_VDVFS12, + MT6331_ID_VDVFS13, + MT6331_ID_VDVFS14, + MT6331_ID_VCORE2, + MT6331_ID_VIO18, + /* LDO */ + MT6331_ID_VTCXO1, + MT6331_ID_VTCXO2, + MT6331_ID_AVDD32_AUD, + MT6331_ID_VAUXA32, + MT6331_ID_VCAMA, + MT6331_ID_VIO28, + MT6331_ID_VCAM_AF, + MT6331_ID_VMC, + MT6331_ID_VMCH, + MT6331_ID_VEMC33, + MT6331_ID_VGP1, + MT6331_ID_VSIM1, + MT6331_ID_VSIM2, + MT6331_ID_VMIPI, + MT6331_ID_VIBR, + MT6331_ID_VGP4, + MT6331_ID_VCAMD, + MT6331_ID_VUSB10, + MT6331_ID_VCAM_IO, + MT6331_ID_VSRAM_DVFS1, + MT6331_ID_VGP2, + MT6331_ID_VGP3, + MT6331_ID_VRTC, + MT6331_ID_VDIG18, + MT6331_ID_VREG_MAX +}; + +#endif /* __LINUX_REGULATOR_MT6331_H */ -- cgit v1.2.3 From 1cc5a52e873a4f9725eafe5aa9cd213b7b58e29e Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 13 Sep 2022 14:34:56 +0200 Subject: regulator: Add driver for MT6332 PMIC regulators Add a driver for the regulators found in the MT6332 PMICs, including six buck and four LDO regulators. Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220913123456.384513-5-angelogioacchino.delregno@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 9 + drivers/regulator/Makefile | 1 + drivers/regulator/mt6332-regulator.c | 422 +++++++++++++++++++++++++++++ include/linux/regulator/mt6332-regulator.h | 27 ++ 4 files changed, 459 insertions(+) create mode 100644 drivers/regulator/mt6332-regulator.c create mode 100644 include/linux/regulator/mt6332-regulator.h (limited to 'include') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 3afeb01446fb..66593f627fc3 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -796,6 +796,15 @@ config REGULATOR_MT6331 This driver supports the control of different power rails of device through regulator interface +config REGULATOR_MT6332 + tristate "MediaTek MT6332 PMIC" + depends on MFD_MT6397 + help + Say y here to select this option to enable the power regulator of + MediaTek MT6332 PMIC. + This driver supports the control of different power rails of device + through regulator interface + config REGULATOR_MT6358 tristate "MediaTek MT6358 PMIC" depends on MFD_MT6397 diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 1d9b2651020e..aada1f7d747b 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_REGULATOR_MT6311) += mt6311-regulator.o obj-$(CONFIG_REGULATOR_MT6315) += mt6315-regulator.o obj-$(CONFIG_REGULATOR_MT6323) += mt6323-regulator.o obj-$(CONFIG_REGULATOR_MT6331) += mt6331-regulator.o +obj-$(CONFIG_REGULATOR_MT6332) += mt6332-regulator.o obj-$(CONFIG_REGULATOR_MT6358) += mt6358-regulator.o obj-$(CONFIG_REGULATOR_MT6359) += mt6359-regulator.o obj-$(CONFIG_REGULATOR_MT6360) += mt6360-regulator.o diff --git a/drivers/regulator/mt6332-regulator.c b/drivers/regulator/mt6332-regulator.c new file mode 100644 index 000000000000..77a27d8127a3 --- /dev/null +++ b/drivers/regulator/mt6332-regulator.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Copyright (c) 2022 Collabora Ltd. +// Author: AngeloGioacchino Del Regno +// +// Based on mt6323-regulator.c, +// Copyright (c) 2016 MediaTek Inc. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MT6332_LDO_MODE_NORMAL 0 +#define MT6332_LDO_MODE_LP 1 + +/* + * MT6332 regulators information + * + * @desc: standard fields of regulator description. + * @qi: Mask for query enable signal status of regulators + * @vselon_reg: Register sections for hardware control mode of bucks + * @vselctrl_reg: Register for controlling the buck control mode. + * @vselctrl_mask: Mask for query buck's voltage control mode. + * @status_reg: Register for regulator enable status where qi unavailable + * @status_mask: Mask for querying regulator enable status + */ +struct mt6332_regulator_info { + struct regulator_desc desc; + u32 qi; + u32 vselon_reg; + u32 vselctrl_reg; + u32 vselctrl_mask; + u32 modeset_reg; + u32 modeset_mask; + u32 status_reg; + u32 status_mask; +}; + +#define MT6332_BUCK(match, vreg, min, max, step, volt_ranges, enreg, \ + vosel, vosel_mask, voselon, vosel_ctrl) \ +[MT6332_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6332_buck_volt_range_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6332_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = (max - min)/step + 1, \ + .linear_ranges = volt_ranges, \ + .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(0), \ + }, \ + .qi = BIT(13), \ + .vselon_reg = voselon, \ + .vselctrl_reg = vosel_ctrl, \ + .vselctrl_mask = BIT(1), \ + .status_mask = 0, \ +} + +#define MT6332_LDO_LINEAR(match, vreg, min, max, step, volt_ranges, \ + enreg, vosel, vosel_mask, voselon, \ + vosel_ctrl, _modeset_reg, _modeset_mask) \ +[MT6332_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6332_ldo_volt_range_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6332_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = (max - min)/step + 1, \ + .linear_ranges = volt_ranges, \ + .n_linear_ranges = ARRAY_SIZE(volt_ranges), \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(0), \ + }, \ + .qi = BIT(15), \ + .vselon_reg = voselon, \ + .vselctrl_reg = vosel_ctrl, \ + .vselctrl_mask = BIT(1), \ + .modeset_reg = _modeset_reg, \ + .modeset_mask = _modeset_mask, \ + .status_mask = 0, \ +} + +#define MT6332_LDO_AO(match, vreg, ldo_volt_table, vosel, vosel_mask) \ +[MT6332_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6332_volt_table_ao_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6332_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = ARRAY_SIZE(ldo_volt_table), \ + .volt_table = ldo_volt_table, \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + }, \ +} + +#define MT6332_LDO(match, vreg, ldo_volt_table, enreg, enbit, vosel, \ + vosel_mask, _modeset_reg, _modeset_mask) \ +[MT6332_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6332_volt_table_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6332_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = ARRAY_SIZE(ldo_volt_table), \ + .volt_table = ldo_volt_table, \ + .vsel_reg = vosel, \ + .vsel_mask = vosel_mask, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + }, \ + .qi = BIT(15), \ + .modeset_reg = _modeset_reg, \ + .modeset_mask = _modeset_mask, \ + .status_mask = 0, \ +} + +#define MT6332_REG_FIXED(match, vreg, enreg, enbit, qibit, volt, stbit) \ +[MT6332_ID_##vreg] = { \ + .desc = { \ + .name = #vreg, \ + .of_match = of_match_ptr(match), \ + .ops = &mt6332_volt_fixed_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = MT6332_ID_##vreg, \ + .owner = THIS_MODULE, \ + .n_voltages = 1, \ + .enable_reg = enreg, \ + .enable_mask = BIT(enbit), \ + .min_uV = volt, \ + }, \ + .qi = BIT(qibit), \ + .status_reg = MT6332_EN_STATUS0, \ + .status_mask = BIT(stbit), \ +} + +static const struct linear_range boost_volt_range[] = { + REGULATOR_LINEAR_RANGE(3500000, 0, 0x7f, 31250), +}; + +static const struct linear_range buck_volt_range[] = { + REGULATOR_LINEAR_RANGE(700000, 0, 0x7f, 6250), +}; + +static const struct linear_range buck_pa_volt_range[] = { + REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000), +}; + +static const struct linear_range buck_rf_volt_range[] = { + REGULATOR_LINEAR_RANGE(1050000, 0, 0x7f, 9375), +}; + +static const unsigned int ldo_volt_table1[] = { + 2800000, 3000000, 0, 3200000 +}; + +static const unsigned int ldo_volt_table2[] = { + 1200000, 1300000, 1400000, 1500000, 1600000, 1700000, 1800000, 1800000, +}; + +static int mt6332_get_status(struct regulator_dev *rdev) +{ + struct mt6332_regulator_info *info = rdev_get_drvdata(rdev); + u32 reg, en_mask, regval; + int ret; + + if (info->qi > 0) { + reg = info->desc.enable_reg; + en_mask = info->qi; + } else { + reg = info->status_reg; + en_mask = info->status_mask; + } + + ret = regmap_read(rdev->regmap, reg, ®val); + if (ret != 0) { + dev_err(&rdev->dev, "Failed to get enable reg: %d\n", ret); + return ret; + } + + return (regval & en_mask) ? REGULATOR_STATUS_ON : REGULATOR_STATUS_OFF; +} + +static int mt6332_ldo_set_mode(struct regulator_dev *rdev, unsigned int mode) +{ + struct mt6332_regulator_info *info = rdev_get_drvdata(rdev); + int val; + + switch (mode) { + case REGULATOR_MODE_STANDBY: + val = MT6332_LDO_MODE_LP; + break; + case REGULATOR_MODE_NORMAL: + val = MT6332_LDO_MODE_NORMAL; + break; + default: + return -EINVAL; + } + + val <<= ffs(info->modeset_mask) - 1; + + return regmap_update_bits(rdev->regmap, info->modeset_reg, + info->modeset_mask, val); +} + +static unsigned int mt6332_ldo_get_mode(struct regulator_dev *rdev) +{ + struct mt6332_regulator_info *info = rdev_get_drvdata(rdev); + unsigned int val; + int ret; + + ret = regmap_read(rdev->regmap, info->modeset_reg, &val); + if (ret < 0) + return ret; + + val &= info->modeset_mask; + val >>= ffs(info->modeset_mask) - 1; + + return (val & BIT(0)) ? REGULATOR_MODE_STANDBY : REGULATOR_MODE_NORMAL; +} + +static const struct regulator_ops mt6332_buck_volt_range_ops = { + .list_voltage = regulator_list_voltage_linear_range, + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6332_get_status, +}; + +static const struct regulator_ops mt6332_ldo_volt_range_ops = { + .list_voltage = regulator_list_voltage_linear_range, + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6332_get_status, + .set_mode = mt6332_ldo_set_mode, + .get_mode = mt6332_ldo_get_mode, +}; + +static const struct regulator_ops mt6332_volt_table_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6332_get_status, + .set_mode = mt6332_ldo_set_mode, + .get_mode = mt6332_ldo_get_mode, +}; + +static const struct regulator_ops mt6332_volt_table_ao_ops = { + .list_voltage = regulator_list_voltage_table, + .map_voltage = regulator_map_voltage_iterate, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, +}; + +static const struct regulator_ops mt6332_volt_fixed_ops = { + .list_voltage = regulator_list_voltage_linear, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .get_status = mt6332_get_status, +}; + +/* The array is indexed by id(MT6332_ID_XXX) */ +static struct mt6332_regulator_info mt6332_regulators[] = { + MT6332_BUCK("buck-vdram", VDRAM, 700000, 1493750, 6250, buck_volt_range, + MT6332_EN_STATUS0, MT6332_VDRAM_CON11, GENMASK(6, 0), + MT6332_VDRAM_CON12, MT6332_VDRAM_CON7), + MT6332_BUCK("buck-vdvfs2", VDVFS2, 700000, 1312500, 6250, buck_volt_range, + MT6332_VDVFS2_CON9, MT6332_VDVFS2_CON11, GENMASK(6, 0), + MT6332_VDVFS2_CON12, MT6332_VDVFS2_CON7), + MT6332_BUCK("buck-vpa", VPA, 500000, 3400000, 50000, buck_pa_volt_range, + MT6332_VPA_CON9, MT6332_VPA_CON11, GENMASK(5, 0), + MT6332_VPA_CON12, MT6332_VPA_CON7), + MT6332_BUCK("buck-vrf18a", VRF1, 1050000, 2240625, 9375, buck_rf_volt_range, + MT6332_VRF1_CON9, MT6332_VRF1_CON11, GENMASK(6, 0), + MT6332_VRF1_CON12, MT6332_VRF1_CON7), + MT6332_BUCK("buck-vrf18b", VRF2, 1050000, 2240625, 9375, buck_rf_volt_range, + MT6332_VRF2_CON9, MT6332_VRF2_CON11, GENMASK(6, 0), + MT6332_VRF2_CON12, MT6332_VRF2_CON7), + MT6332_BUCK("buck-vsbst", VSBST, 3500000, 7468750, 31250, boost_volt_range, + MT6332_VSBST_CON8, MT6332_VSBST_CON12, GENMASK(6, 0), + MT6332_VSBST_CON13, MT6332_VSBST_CON8), + MT6332_LDO("ldo-vauxb32", VAUXB32, ldo_volt_table1, MT6332_LDO_CON1, 10, + MT6332_LDO_CON9, GENMASK(6, 5), MT6332_LDO_CON1, GENMASK(1, 0)), + MT6332_REG_FIXED("ldo-vbif28", VBIF28, MT6332_LDO_CON2, 10, 0, 2800000, 1), + MT6332_REG_FIXED("ldo-vusb33", VUSB33, MT6332_LDO_CON3, 10, 0, 3300000, 2), + MT6332_LDO_LINEAR("ldo-vsram", VSRAM_DVFS2, 700000, 1493750, 6250, buck_volt_range, + MT6332_EN_STATUS0, MT6332_LDO_CON8, GENMASK(15, 9), + MT6332_VDVFS2_CON23, MT6332_VDVFS2_CON22, + MT6332_LDO_CON5, GENMASK(1, 0)), + MT6332_LDO_AO("ldo-vdig18", VDIG18, ldo_volt_table2, MT6332_LDO_CON12, GENMASK(11, 9)), +}; + +static int mt6332_set_buck_vosel_reg(struct platform_device *pdev) +{ + struct mt6397_chip *mt6332 = dev_get_drvdata(pdev->dev.parent); + int i; + u32 regval; + + for (i = 0; i < MT6332_ID_VREG_MAX; i++) { + if (mt6332_regulators[i].vselctrl_reg) { + if (regmap_read(mt6332->regmap, + mt6332_regulators[i].vselctrl_reg, + ®val) < 0) { + dev_err(&pdev->dev, + "Failed to read buck ctrl\n"); + return -EIO; + } + + if (regval & mt6332_regulators[i].vselctrl_mask) { + mt6332_regulators[i].desc.vsel_reg = + mt6332_regulators[i].vselon_reg; + } + } + } + + return 0; +} + +static int mt6332_regulator_probe(struct platform_device *pdev) +{ + struct mt6397_chip *mt6332 = dev_get_drvdata(pdev->dev.parent); + struct regulator_config config = {}; + struct regulator_dev *rdev; + int i; + u32 reg_value; + + /* Query buck controller to select activated voltage register part */ + if (mt6332_set_buck_vosel_reg(pdev)) + return -EIO; + + /* Read PMIC chip revision to update constraints and voltage table */ + if (regmap_read(mt6332->regmap, MT6332_HWCID, ®_value) < 0) { + dev_err(&pdev->dev, "Failed to read Chip ID\n"); + return -EIO; + } + reg_value &= GENMASK(7, 0); + + dev_info(&pdev->dev, "Chip ID = 0x%x\n", reg_value); + + /* + * ChipID 0x10 is "MT6332 E1", has a different voltage table and + * it's currently not supported in this driver. Upon detection of + * this ID, refuse to register the regulators, as we will wrongly + * interpret the VSEL for this revision, potentially overvolting + * some device. + */ + if (reg_value == 0x10) { + dev_err(&pdev->dev, "Chip version not supported. Bailing out.\n"); + return -EINVAL; + } + + for (i = 0; i < MT6332_ID_VREG_MAX; i++) { + config.dev = &pdev->dev; + config.driver_data = &mt6332_regulators[i]; + config.regmap = mt6332->regmap; + rdev = devm_regulator_register(&pdev->dev, + &mt6332_regulators[i].desc, &config); + if (IS_ERR(rdev)) { + dev_err(&pdev->dev, "failed to register %s\n", + mt6332_regulators[i].desc.name); + return PTR_ERR(rdev); + } + } + return 0; +} + +static const struct platform_device_id mt6332_platform_ids[] = { + {"mt6332-regulator", 0}, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(platform, mt6332_platform_ids); + +static struct platform_driver mt6332_regulator_driver = { + .driver = { + .name = "mt6332-regulator", + }, + .probe = mt6332_regulator_probe, + .id_table = mt6332_platform_ids, +}; + +module_platform_driver(mt6332_regulator_driver); + +MODULE_AUTHOR("AngeloGioacchino Del Regno "); +MODULE_DESCRIPTION("Regulator Driver for MediaTek MT6332 PMIC"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/regulator/mt6332-regulator.h b/include/linux/regulator/mt6332-regulator.h new file mode 100644 index 000000000000..af5e3ed31029 --- /dev/null +++ b/include/linux/regulator/mt6332-regulator.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Collabora Ltd. + * Author: AngeloGioacchino Del Regno + */ + +#ifndef __LINUX_REGULATOR_MT6332_H +#define __LINUX_REGULATOR_MT6332_H + +enum { + /* BUCK */ + MT6332_ID_VDRAM = 0, + MT6332_ID_VDVFS2, + MT6332_ID_VPA, + MT6332_ID_VRF1, + MT6332_ID_VRF2, + MT6332_ID_VSBST, + /* LDO */ + MT6332_ID_VAUXB32, + MT6332_ID_VBIF28, + MT6332_ID_VDIG18, + MT6332_ID_VSRAM_DVFS2, + MT6332_ID_VUSB33, + MT6332_ID_VREG_MAX +}; + +#endif /* __LINUX_REGULATOR_MT6332_H */ -- cgit v1.2.3