diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 10:05:34 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 10:05:34 -0700 |
commit | 1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe (patch) | |
tree | 552e03de245cdbd0780ca1215914edc4a26540f7 /net/rds/ib_cm.c | |
parent | 6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f (diff) | |
parent | fe30937b65354c7fec244caebbdaae68e28ca797 (diff) | |
download | lwn-1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe.tar.gz lwn-1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support more Realtek wireless chips, from Jes Sorenson.
2) New BPF types for per-cpu hash and arrap maps, from Alexei
Starovoitov.
3) Make several TCP sysctls per-namespace, from Nikolay Borisov.
4) Allow the use of SO_REUSEPORT in order to do per-thread processing
of incoming TCP/UDP connections. The muxing can be done using a
BPF program which hashes the incoming packet. From Craig Gallek.
5) Add a multiplexer for TCP streams, to provide a messaged based
interface. BPF programs can be used to determine the message
boundaries. From Tom Herbert.
6) Add 802.1AE MACSEC support, from Sabrina Dubroca.
7) Avoid factorial complexity when taking down an inetdev interface
with lots of configured addresses. We were doing things like
traversing the entire address less for each address removed, and
flushing the entire netfilter conntrack table for every address as
well.
8) Add and use SKB bulk free infrastructure, from Jesper Brouer.
9) Allow offloading u32 classifiers to hardware, and implement for
ixgbe, from John Fastabend.
10) Allow configuring IRQ coalescing parameters on a per-queue basis,
from Kan Liang.
11) Extend ethtool so that larger link mode masks can be supported.
From David Decotigny.
12) Introduce devlink, which can be used to configure port link types
(ethernet vs Infiniband, etc.), port splitting, and switch device
level attributes as a whole. From Jiri Pirko.
13) Hardware offload support for flower classifiers, from Amir Vadai.
14) Add "Local Checksum Offload". Basically, for a tunneled packet
the checksum of the outer header is 'constant' (because with the
checksum field filled into the inner protocol header, the payload
of the outer frame checksums to 'zero'), and we can take advantage
of that in various ways. From Edward Cree"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits)
bonding: fix bond_get_stats()
net: bcmgenet: fix dma api length mismatch
net/mlx4_core: Fix backward compatibility on VFs
phy: mdio-thunder: Fix some Kconfig typos
lan78xx: add ndo_get_stats64
lan78xx: handle statistics counter rollover
RDS: TCP: Remove unused constant
RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
net: smc911x: convert pxa dma to dmaengine
team: remove duplicate set of flag IFF_MULTICAST
bonding: remove duplicate set of flag IFF_MULTICAST
net: fix a comment typo
ethernet: micrel: fix some error codes
ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it
bpf, dst: add and use dst_tclassid helper
bpf: make skb->tc_classid also readable
net: mvneta: bm: clarify dependencies
cls_bpf: reset class and reuse major in da
ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c
ldmvsw: Add ldmvsw.c driver code
...
Diffstat (limited to 'net/rds/ib_cm.c')
-rw-r--r-- | net/rds/ib_cm.c | 59 |
1 files changed, 42 insertions, 17 deletions
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index da5a7fb98c77..8764970f0c24 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -236,12 +236,10 @@ static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context) tasklet_schedule(&ic->i_recv_tasklet); } -static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq, - struct ib_wc *wcs, - struct rds_ib_ack_state *ack_state) +static void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq, + struct ib_wc *wcs) { - int nr; - int i; + int nr, i; struct ib_wc *wc; while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { @@ -251,10 +249,12 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq, (unsigned long long)wc->wr_id, wc->status, wc->byte_len, be32_to_cpu(wc->ex.imm_data)); - if (wc->wr_id & RDS_IB_SEND_OP) + if (wc->wr_id <= ic->i_send_ring.w_nr || + wc->wr_id == RDS_IB_ACK_WR_ID) rds_ib_send_cqe_handler(ic, wc); else - rds_ib_recv_cqe_handler(ic, wc, ack_state); + rds_ib_mr_cqe_handler(ic, wc); + } } } @@ -263,14 +263,12 @@ static void rds_ib_tasklet_fn_send(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; - struct rds_ib_ack_state state; rds_ib_stats_inc(s_ib_tasklet_call); - memset(&state, 0, sizeof(state)); - poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state); + poll_scq(ic, ic->i_send_cq, ic->i_send_wc); ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); - poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state); + poll_scq(ic, ic->i_send_cq, ic->i_send_wc); if (rds_conn_up(conn) && (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || @@ -278,6 +276,25 @@ static void rds_ib_tasklet_fn_send(unsigned long data) rds_send_xmit(ic->conn); } +static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq, + struct ib_wc *wcs, + struct rds_ib_ack_state *ack_state) +{ + int nr, i; + struct ib_wc *wc; + + while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { + for (i = 0; i < nr; i++) { + wc = wcs + i; + rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", + (unsigned long long)wc->wr_id, wc->status, + wc->byte_len, be32_to_cpu(wc->ex.imm_data)); + + rds_ib_recv_cqe_handler(ic, wc, ack_state); + } + } +} + static void rds_ib_tasklet_fn_recv(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; @@ -291,9 +308,9 @@ static void rds_ib_tasklet_fn_recv(unsigned long data) rds_ib_stats_inc(s_ib_tasklet_call); memset(&state, 0, sizeof(state)); - poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); + poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); - poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); + poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); if (state.ack_next_valid) rds_ib_set_ack(ic, state.ack_next, state.ack_required); @@ -351,7 +368,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct ib_qp_init_attr attr; struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; - int ret; + int ret, fr_queue_space; /* * It's normal to see a null device if an incoming connection races @@ -361,6 +378,12 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!rds_ibdev) return -EOPNOTSUPP; + /* The fr_queue_space is currently set to 512, to add extra space on + * completion queue and send queue. This extra space is used for FRMR + * registration and invalidation work requests + */ + fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); + /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); @@ -372,7 +395,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; - cq_attr.cqe = ic->i_send_ring.w_nr + 1; + cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1; ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, rds_ib_cq_event_handler, conn, @@ -412,7 +435,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ - attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; + attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1; attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; @@ -420,6 +443,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; + atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); /* * XXX this can fail if max_*_wr is too large? Are we supposed @@ -739,7 +763,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) */ wait_event(rds_ib_ring_empty_wait, rds_ib_ring_empty(&ic->i_recv_ring) && - (atomic_read(&ic->i_signaled_sends) == 0)); + (atomic_read(&ic->i_signaled_sends) == 0) && + (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); tasklet_kill(&ic->i_send_tasklet); tasklet_kill(&ic->i_recv_tasklet); |