summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-05-22 10:25:07 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-22 10:25:08 +0200
commitfd0bfa8d6e046237cb591bd3b51c87c56d4772b2 (patch)
tree12259d05c54986563fb2696dd85b48fc5c94f686
parentd849f9f9768cf9ba6d68a395c67065522625b27c (diff)
parentd3b42f1422d9c050bf5a2c660c045af2ab5d3e72 (diff)
downloadlwn-fd0bfa8d6e046237cb591bd3b51c87c56d4772b2.tar.gz
lwn-fd0bfa8d6e046237cb591bd3b51c87c56d4772b2.zip
Merge branch 'bpf-af-xdp-cleanups'
Björn Töpel says: ==================== This the second follow-up set. The first four patches are uapi changes: * Removing rebind support * Getting rid of structure hole * Removing explicit cache line alignment * Stricter bind checks The last patches do some cleanups, where the umem and refcount_t changes were suggested by Daniel. * Add a missing write-barrier and use READ_ONCE for data-dependencies * Clean up umem and do proper locking * Convert atomic_t to refcount_t ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/uapi/linux/if_xdp.h46
-rw-r--r--net/xdp/xdp_umem.c85
-rw-r--r--net/xdp/xdp_umem.h5
-rw-r--r--net/xdp/xsk.c105
-rw-r--r--net/xdp/xsk_queue.h17
-rw-r--r--samples/bpf/xdpsock_user.c123
6 files changed, 225 insertions, 156 deletions
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 56db977221d2..4737cfe222f5 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -17,19 +17,33 @@
struct sockaddr_xdp {
__u16 sxdp_family;
+ __u16 sxdp_flags;
__u32 sxdp_ifindex;
__u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd;
- __u16 sxdp_flags;
+};
+
+struct xdp_ring_offset {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+struct xdp_mmap_offsets {
+ struct xdp_ring_offset rx;
+ struct xdp_ring_offset tx;
+ struct xdp_ring_offset fr; /* Fill */
+ struct xdp_ring_offset cr; /* Completion */
};
/* XDP socket options */
-#define XDP_RX_RING 1
-#define XDP_TX_RING 2
-#define XDP_UMEM_REG 3
-#define XDP_UMEM_FILL_RING 4
-#define XDP_UMEM_COMPLETION_RING 5
-#define XDP_STATISTICS 6
+#define XDP_MMAP_OFFSETS 1
+#define XDP_RX_RING 2
+#define XDP_TX_RING 3
+#define XDP_UMEM_REG 4
+#define XDP_UMEM_FILL_RING 5
+#define XDP_UMEM_COMPLETION_RING 6
+#define XDP_STATISTICS 7
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -50,6 +64,7 @@ struct xdp_statistics {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
+/* Rx/Tx descriptor */
struct xdp_desc {
__u32 idx;
__u32 len;
@@ -58,21 +73,6 @@ struct xdp_desc {
__u8 padding[5];
};
-struct xdp_ring {
- __u32 producer __attribute__((aligned(64)));
- __u32 consumer __attribute__((aligned(64)));
-};
-
-/* Used for the RX and TX queues for packets */
-struct xdp_rxtx_ring {
- struct xdp_ring ptrs;
- struct xdp_desc desc[0] __attribute__((aligned(64)));
-};
-
-/* Used for the fill and completion queues for buffers */
-struct xdp_umem_ring {
- struct xdp_ring ptrs;
- __u32 desc[0] __attribute__((aligned(64)));
-};
+/* UMEM descriptor is __u32 */
#endif /* _LINUX_IF_XDP_H */
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index c47909c74899..87998818116f 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -16,39 +16,25 @@
#define XDP_UMEM_MIN_FRAME_SIZE 2048
-int xdp_umem_create(struct xdp_umem **umem)
-{
- *umem = kzalloc(sizeof(**umem), GFP_KERNEL);
-
- if (!*umem)
- return -ENOMEM;
-
- return 0;
-}
-
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unsigned int i;
- if (umem->pgs) {
- for (i = 0; i < umem->npgs; i++) {
- struct page *page = umem->pgs[i];
-
- set_page_dirty_lock(page);
- put_page(page);
- }
+ for (i = 0; i < umem->npgs; i++) {
+ struct page *page = umem->pgs[i];
- kfree(umem->pgs);
- umem->pgs = NULL;
+ set_page_dirty_lock(page);
+ put_page(page);
}
+
+ kfree(umem->pgs);
+ umem->pgs = NULL;
}
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{
- if (umem->user) {
- atomic_long_sub(umem->npgs, &umem->user->locked_vm);
- free_uid(umem->user);
- }
+ atomic_long_sub(umem->npgs, &umem->user->locked_vm);
+ free_uid(umem->user);
}
static void xdp_umem_release(struct xdp_umem *umem)
@@ -66,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
- if (umem->pgs) {
- xdp_umem_unpin_pages(umem);
-
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
+ xdp_umem_unpin_pages(umem);
- mmput(mm);
- umem->pgs = NULL;
- }
+ task = get_pid_task(umem->pid, PIDTYPE_PID);
+ put_pid(umem->pid);
+ if (!task)
+ goto out;
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
+ mmput(mm);
xdp_umem_unaccount_pages(umem);
out:
kfree(umem);
@@ -96,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
void xdp_get_umem(struct xdp_umem *umem)
{
- atomic_inc(&umem->users);
+ refcount_inc(&umem->users);
}
void xdp_put_umem(struct xdp_umem *umem)
@@ -104,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem)
return;
- if (atomic_dec_and_test(&umem->users)) {
+ if (refcount_dec_and_test(&umem->users)) {
INIT_WORK(&umem->work, xdp_umem_release_deferred);
schedule_work(&umem->work);
}
@@ -167,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
-int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
u64 addr = mr->addr, size = mr->len;
unsigned int nframes, nfpp;
int size_chk, err;
- if (!umem)
- return -EINVAL;
-
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
* - huge pages, or*
@@ -227,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->frame_size_log2 = ilog2(frame_size);
umem->nfpp_mask = nfpp - 1;
umem->nfpplog2 = ilog2(nfpp);
- atomic_set(&umem->users, 1);
+ refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
if (err)
@@ -245,6 +224,24 @@ out:
return err;
}
+struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
+{
+ struct xdp_umem *umem;
+ int err;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ err = xdp_umem_reg(umem, mr);
+ if (err) {
+ kfree(umem);
+ return ERR_PTR(err);
+ }
+
+ return umem;
+}
+
bool xdp_umem_validate_queues(struct xdp_umem *umem)
{
return umem->fq && umem->cq;
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 70fe225baa51..0881cf456230 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -27,7 +27,7 @@ struct xdp_umem {
struct pid *pid;
unsigned long address;
size_t size;
- atomic_t users;
+ refcount_t users;
struct work_struct work;
};
@@ -50,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
}
bool xdp_umem_validate_queues(struct xdp_umem *umem);
-int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
-int xdp_umem_create(struct xdp_umem **umem);
+struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 817340f7725d..cce0e4f8a536 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -142,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
+ if (xs->queue_id >= xs->dev->real_num_tx_queues) {
+ err = -ENXIO;
+ goto out;
+ }
+
skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@@ -223,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
if (!q)
return -ENOMEM;
+ /* Make sure queue is ready before it can be seen by others */
+ smp_wmb();
*queue = q;
return 0;
}
-static void __xsk_release(struct xdp_sock *xs)
-{
- /* Wait for driver to stop using the xdp socket. */
- synchronize_net();
-
- dev_put(xs->dev);
-}
-
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -251,7 +250,9 @@ static int xsk_release(struct socket *sock)
local_bh_enable();
if (xs->dev) {
- __xsk_release(xs);
+ /* Wait for driver to stop using the xdp socket. */
+ synchronize_net();
+ dev_put(xs->dev);
xs->dev = NULL;
}
@@ -285,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
struct sock *sk = sock->sk;
- struct net_device *dev, *dev_curr;
struct xdp_sock *xs = xdp_sk(sk);
- struct xdp_umem *old_umem = NULL;
+ struct net_device *dev;
int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp))
@@ -296,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
mutex_lock(&xs->mutex);
- dev_curr = xs->dev;
+ if (xs->dev) {
+ err = -EBUSY;
+ goto out_release;
+ }
+
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
if (!dev) {
err = -ENODEV;
@@ -308,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
- if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
+ if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
+ (xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
err = -EINVAL;
goto out_unlock;
}
@@ -343,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
xdp_get_umem(umem_xs->umem);
- old_umem = xs->umem;
xs->umem = umem_xs->umem;
sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
@@ -355,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xskq_set_umem(xs->umem->cq, &xs->umem->props);
}
- /* Rebind? */
- if (dev_curr && (dev_curr != dev ||
- xs->queue_id != sxdp->sxdp_queue_id)) {
- __xsk_release(xs);
- if (old_umem)
- xdp_put_umem(old_umem);
- }
-
xs->dev = dev;
xs->queue_id = sxdp->sxdp_queue_id;
@@ -410,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xdp_umem_reg mr;
struct xdp_umem *umem;
- if (xs->umem)
- return -EBUSY;
-
if (copy_from_user(&mr, optval, sizeof(mr)))
return -EFAULT;
mutex_lock(&xs->mutex);
- err = xdp_umem_create(&umem);
+ if (xs->umem) {
+ mutex_unlock(&xs->mutex);
+ return -EBUSY;
+ }
- err = xdp_umem_reg(umem, &mr);
- if (err) {
- kfree(umem);
+ umem = xdp_umem_create(&mr);
+ if (IS_ERR(umem)) {
mutex_unlock(&xs->mutex);
- return err;
+ return PTR_ERR(umem);
}
/* Make sure umem is ready before it can be seen by others */
smp_wmb();
-
xs->umem = umem;
mutex_unlock(&xs->mutex);
return 0;
@@ -439,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
- if (!xs->umem)
- return -EINVAL;
-
if (copy_from_user(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
+ if (!xs->umem) {
+ mutex_unlock(&xs->mutex);
+ return -EINVAL;
+ }
+
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq;
err = xsk_init_queue(entries, q, true);
@@ -495,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+ case XDP_MMAP_OFFSETS:
+ {
+ struct xdp_mmap_offsets off;
+
+ if (len < sizeof(off))
+ return -EINVAL;
+
+ off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+ off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+ off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
+ off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+ off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+ off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
+
+ off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+ off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+ off.fr.desc = offsetof(struct xdp_umem_ring, desc);
+ off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+ off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+ off.cr.desc = offsetof(struct xdp_umem_ring, desc);
+
+ len = sizeof(off);
+ if (copy_to_user(optval, &off, len))
+ return -EFAULT;
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+ }
default:
break;
}
@@ -509,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
+ struct xdp_umem *umem;
unsigned long pfn;
struct page *qpg;
if (offset == XDP_PGOFF_RX_RING) {
- q = xs->rx;
+ q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) {
- q = xs->tx;
+ q = READ_ONCE(xs->tx);
} else {
- if (!xs->umem)
+ umem = READ_ONCE(xs->umem);
+ if (!umem)
return -EINVAL;
if (offset == XDP_UMEM_PGOFF_FILL_RING)
- q = xs->umem->fq;
+ q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
- q = xs->umem->cq;
+ q = READ_ONCE(umem->cq);
}
if (!q)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 62e43be407d8..cb8e5be35110 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -13,6 +13,23 @@
#define RX_BATCH_SIZE 16
+struct xdp_ring {
+ u32 producer ____cacheline_aligned_in_smp;
+ u32 consumer ____cacheline_aligned_in_smp;
+};
+
+/* Used for the RX and TX queues for packets */
+struct xdp_rxtx_ring {
+ struct xdp_ring ptrs;
+ struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
+};
+
+/* Used for the fill and completion queues for buffers */
+struct xdp_umem_ring {
+ struct xdp_ring ptrs;
+ u32 desc[0] ____cacheline_aligned_in_smp;
+};
+
struct xsk_queue {
struct xdp_umem_props umem_props;
u32 ring_mask;
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 60a882a2296c..e379eac034ac 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -79,7 +79,10 @@ struct xdp_umem_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
- struct xdp_umem_ring *ring;
+ u32 *producer;
+ u32 *consumer;
+ u32 *ring;
+ void *map;
};
struct xdp_umem {
@@ -94,7 +97,10 @@ struct xdp_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
- struct xdp_rxtx_ring *ring;
+ u32 *producer;
+ u32 *consumer;
+ struct xdp_desc *ring;
+ void *map;
};
struct xdpsock {
@@ -155,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
return free_entries;
/* Refresh the local tail pointer */
- q->cached_cons = q->ring->ptrs.consumer;
+ q->cached_cons = *q->consumer;
return q->size - (q->cached_prod - q->cached_cons);
}
@@ -168,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
return free_entries;
/* Refresh the local tail pointer */
- q->cached_cons = q->ring->ptrs.consumer + q->size;
+ q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod;
}
@@ -177,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
- q->cached_prod = q->ring->ptrs.producer;
+ q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@@ -189,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
- q->cached_prod = q->ring->ptrs.producer;
+ q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@@ -208,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring->desc[idx] = d[i].idx;
+ fq->ring[idx] = d[i].idx;
}
u_smp_wmb();
- fq->ring->ptrs.producer = fq->cached_prod;
+ *fq->producer = fq->cached_prod;
return 0;
}
@@ -229,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring->desc[idx] = d[i];
+ fq->ring[idx] = d[i];
}
u_smp_wmb();
- fq->ring->ptrs.producer = fq->cached_prod;
+ *fq->producer = fq->cached_prod;
return 0;
}
@@ -248,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask;
- d[i] = cq->ring->desc[idx];
+ d[i] = cq->ring[idx];
}
if (entries > 0) {
u_smp_wmb();
- cq->ring->ptrs.consumer = cq->cached_cons;
+ *cq->consumer = cq->cached_cons;
}
return entries;
@@ -270,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
const struct xdp_desc *descs,
unsigned int ndescs)
{
- struct xdp_rxtx_ring *r = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@@ -279,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- r->desc[idx].idx = descs[i].idx;
- r->desc[idx].len = descs[i].len;
- r->desc[idx].offset = descs[i].offset;
+ r[idx].idx = descs[i].idx;
+ r[idx].len = descs[i].len;
+ r[idx].offset = descs[i].offset;
}
u_smp_wmb();
- r->ptrs.producer = uq->cached_prod;
+ *uq->producer = uq->cached_prod;
return 0;
}
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
__u32 idx, unsigned int ndescs)
{
- struct xdp_rxtx_ring *q = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@@ -302,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- q->desc[idx].idx = idx + i;
- q->desc[idx].len = sizeof(pkt_data) - 1;
- q->desc[idx].offset = 0;
+ r[idx].idx = idx + i;
+ r[idx].len = sizeof(pkt_data) - 1;
+ r[idx].offset = 0;
}
u_smp_wmb();
- q->ptrs.producer = uq->cached_prod;
+ *uq->producer = uq->cached_prod;
return 0;
}
@@ -317,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
struct xdp_desc *descs,
int ndescs)
{
- struct xdp_rxtx_ring *r = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int idx;
int i, entries;
@@ -327,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask;
- descs[i] = r->desc[idx];
+ descs[i] = r[idx];
}
if (entries > 0) {
u_smp_wmb();
- r->ptrs.consumer = uq->cached_cons;
+ *uq->consumer = uq->cached_cons;
}
return entries;
@@ -392,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
static struct xdp_umem *xdp_umem_configure(int sfd)
{
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
+ struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xdp_umem *umem;
+ socklen_t optlen;
void *bufs;
umem = calloc(1, sizeof(*umem));
@@ -413,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0);
- umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
- FQ_NUM_DESCS * sizeof(u32),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_UMEM_PGOFF_FILL_RING);
- lassert(umem->fq.ring != MAP_FAILED);
+ optlen = sizeof(off);
+ lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
+ &optlen) == 0);
+
+ umem->fq.map = mmap(0, off.fr.desc +
+ FQ_NUM_DESCS * sizeof(u32),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS;
+ umem->fq.producer = umem->fq.map + off.fr.producer;
+ umem->fq.consumer = umem->fq.map + off.fr.consumer;
+ umem->fq.ring = umem->fq.map + off.fr.desc;
- umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
+ umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
- lassert(umem->cq.ring != MAP_FAILED);
+ lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS;
+ umem->cq.producer = umem->cq.map + off.cr.producer;
+ umem->cq.consumer = umem->cq.map + off.cr.consumer;
+ umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = (char (*)[FRAME_SIZE])bufs;
umem->fd = sfd;
@@ -449,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
{
struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
int sfd, ndescs = NUM_DESCS;
struct xdpsock *xsk;
bool shared = true;
+ socklen_t optlen;
u32 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
@@ -474,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
&ndescs, sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
&ndescs, sizeof(int)) == 0);
+ optlen = sizeof(off);
+ lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
+ &optlen) == 0);
/* Rx */
- xsk->rx.ring = mmap(NULL,
- sizeof(struct xdp_ring) +
- NUM_DESCS * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_PGOFF_RX_RING);
- lassert(xsk->rx.ring != MAP_FAILED);
+ xsk->rx.map = mmap(NULL,
+ off.rx.desc +
+ NUM_DESCS * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_PGOFF_RX_RING);
+ lassert(xsk->rx.map != MAP_FAILED);
if (!shared) {
for (i = 0; i < NUM_DESCS / 2; i++)
@@ -491,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
}
/* Tx */
- xsk->tx.ring = mmap(NULL,
- sizeof(struct xdp_ring) +
- NUM_DESCS * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_PGOFF_TX_RING);
- lassert(xsk->tx.ring != MAP_FAILED);
+ xsk->tx.map = mmap(NULL,
+ off.tx.desc +
+ NUM_DESCS * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_PGOFF_TX_RING);
+ lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS;
+ xsk->rx.producer = xsk->rx.map + off.rx.producer;
+ xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
+ xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS;
+ xsk->tx.producer = xsk->tx.map + off.tx.producer;
+ xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
+ xsk->tx.ring = xsk->tx.map + off.tx.desc;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex;